diff --git a/.github/workflows/fitbot.yml b/.github/workflows/fitbot.yml index 67325ae2ca..faef8d273d 100644 --- a/.github/workflows/fitbot.yml +++ b/.github/workflows/fitbot.yml @@ -10,7 +10,7 @@ on: env: N3FIT_MAXNREP: 20 # total number of replicas to fit POSTFIT_NREP: 16 # requested replicas for postfit - REFERENCE_SET: NNBOT-3e7c84220-2024-02-28 # reference set for exact results + REFERENCE_SET: NNBOT-344e6f1a9-2024-03-03 # reference set for exact results STABLE_REFERENCE_SET: NNBOT-c0f99b7b3-2024-02-28 # reference set for last tag CONDA_PY: 310 PYTHONHASHSEED: "0" diff --git a/buildmaster/oldcommondata_porter.py b/buildmaster/oldcommondata_porter.py index c89916601d..5e31149dc1 100644 --- a/buildmaster/oldcommondata_porter.py +++ b/buildmaster/oldcommondata_porter.py @@ -94,7 +94,7 @@ def create_uncertainties(df, systype_file, is_default=False, use_multiplicative= if info["treatment"] not in ["ADD", "MULT"]: raise ValueError(f"Treatment type: {info['treatment']} not recognized") if use_multiplicative: - tmp[key] = float(bin_data[2 * n+1]*data[idx-1]/100.0) + tmp[key] = float(bin_data[2 * n + 1] * data[idx - 1] / 100.0) else: tmp[key] = float(bin_data[2 * n]) bins.append(tmp) @@ -340,16 +340,9 @@ def convert_from_old_to_new(dsname, new_info, overwrite=False, dry=False, keep_e metadata = safe_load(metadata_path.read_text()) # Perform sanity checks nnpdf_md = metadata["nnpdf_metadata"] - try: - assert nnpdf_md["experiment"] == plotting_dict["experiment"] - assert nnpdf_md["nnpdf31_process"] == plotting_dict["nnpdf31_process"] - assert metadata.get("setname") == set_name - except AssertionError: - print(traceback.format_exc()) - # If this fails, inspect - import ipdb - - ipdb.set_trace() + assert nnpdf_md["experiment"] == plotting_dict["experiment"] + assert nnpdf_md["nnpdf31_process"] == plotting_dict["nnpdf31_process"] + assert metadata.get("setname") == set_name # Check whether the observable already exists already_implemented = [i["observable_name"] for i in metadata["implemented_observables"]] if obs_name in already_implemented: diff --git a/doc/sphinx/source/tutorials/closuretest.md b/doc/sphinx/source/tutorials/closuretest.md index 1624a2ed46..6136f58717 100644 --- a/doc/sphinx/source/tutorials/closuretest.md +++ b/doc/sphinx/source/tutorials/closuretest.md @@ -171,13 +171,10 @@ the closure test settings modified as shown between running a closure fit in ``n3fit`` and a standard fit is that the user is required to run ``vp-setupfit`` on the runcard before running ``n3fit``. This is because the filtering of the data is required to generate the pseudodata central -values. The filtered data should then be rebuilt before the fit, so there is no -risk of the fit crashing due to multiple replicas rebuilding the data -simultaneously. The workflow is as follows: +values. The workflow is as follows: ```bash $ vp-setupfit fitname.yml -$ vp-rebuild-data fitname $ n3fit fitname.yml ``` diff --git a/extra_tests/regression_fits/central.yml b/extra_tests/regression_fits/central.yml index c24c5eb442..6aae22b822 100644 --- a/extra_tests/regression_fits/central.yml +++ b/extra_tests/regression_fits/central.yml @@ -12,10 +12,10 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -31,7 +31,7 @@ theory: genrep: False # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 load: "weights.h5" #save: "weights.h5" @@ -69,12 +69,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/diagonal.yml b/extra_tests/regression_fits/diagonal.yml index 84d8e17770..e6780c2ebf 100644 --- a/extra_tests/regression_fits/diagonal.yml +++ b/extra_tests/regression_fits/diagonal.yml @@ -12,9 +12,9 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} ############################################################ datacuts: @@ -69,12 +69,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/feature_scaling.yml b/extra_tests/regression_fits/feature_scaling.yml index ac8b404cd3..a0cb3247dd 100644 --- a/extra_tests/regression_fits/feature_scaling.yml +++ b/extra_tests/regression_fits/feature_scaling.yml @@ -12,9 +12,9 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -30,7 +30,7 @@ theory: genrep: True # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 load: "weights_feature.h5" @@ -68,12 +68,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/flavour.yml b/extra_tests/regression_fits/flavour.yml index 1f38ba2c8f..5011983237 100644 --- a/extra_tests/regression_fits/flavour.yml +++ b/extra_tests/regression_fits/flavour.yml @@ -12,8 +12,8 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} ############################################################ datacuts: @@ -29,7 +29,7 @@ theory: genrep: True # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 load: "weights_flavour.h5" @@ -67,12 +67,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/no_lagrange.yml b/extra_tests/regression_fits/no_lagrange.yml index e081869777..35aa1cd905 100644 --- a/extra_tests/regression_fits/no_lagrange.yml +++ b/extra_tests/regression_fits/no_lagrange.yml @@ -12,10 +12,10 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -31,7 +31,7 @@ theory: genrep: False # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 load: "weights.h5" #save: "weights.h5" diff --git a/extra_tests/regression_fits/no_msr.yml b/extra_tests/regression_fits/no_msr.yml index 0ca126c5c5..206df8ea20 100644 --- a/extra_tests/regression_fits/no_msr.yml +++ b/extra_tests/regression_fits/no_msr.yml @@ -12,10 +12,10 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -70,12 +70,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/no_sumrules.yml b/extra_tests/regression_fits/no_sumrules.yml index 25ef4868e4..2d872e617b 100644 --- a/extra_tests/regression_fits/no_sumrules.yml +++ b/extra_tests/regression_fits/no_sumrules.yml @@ -12,10 +12,10 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -70,12 +70,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/no_vsr.yml b/extra_tests/regression_fits/no_vsr.yml index 2aa16e5994..4f2d05b468 100644 --- a/extra_tests/regression_fits/no_vsr.yml +++ b/extra_tests/regression_fits/no_vsr.yml @@ -12,10 +12,10 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -70,12 +70,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/normal_fit.yml b/extra_tests/regression_fits/normal_fit.yml index e8978fb876..258d626626 100644 --- a/extra_tests/regression_fits/normal_fit.yml +++ b/extra_tests/regression_fits/normal_fit.yml @@ -6,8 +6,8 @@ description: n3fit regression test, normal fit without initial weights # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} ############################################################ datacuts: @@ -23,7 +23,7 @@ theory: genrep: True # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 save: "weights.h5" @@ -60,12 +60,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/extra_tests/regression_fits/trainable_prepro.yml b/extra_tests/regression_fits/trainable_prepro.yml index 155d6f5a98..2c37c50019 100644 --- a/extra_tests/regression_fits/trainable_prepro.yml +++ b/extra_tests/regression_fits/trainable_prepro.yml @@ -12,10 +12,10 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } -- { dataset: ATLASTTBARTOT8TEV, frac: 1.0, cfac: ['QCD'] } + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy} + - {dataset: CMS_Z0_8TEV_PT-Y, frac: 0.5, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 1.0, cfac: [QCD], variant: legacy} ############################################################ datacuts: @@ -31,7 +31,7 @@ theory: genrep: True # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 load: "weights.h5" #save: "weights.h5" @@ -69,12 +69,12 @@ fitting: ############################################################ positivity: posdatasets: - - { dataset: POSF2U, maxlambda: 1e6 } # Positivity Lagrange Multiplier - - { dataset: POSDYS, maxlambda: 1e5 } + - {dataset: NNPDF_POS_5GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_5GEV_DYS, maxlambda: 1e5} integrability: integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} ############################################################ debug: true diff --git a/n3fit/src/n3fit/scripts/evolven3fit_new.py b/n3fit/src/n3fit/scripts/evolven3fit_new.py index 004f6fa52c..1423fd1e39 100644 --- a/n3fit/src/n3fit/scripts/evolven3fit_new.py +++ b/n3fit/src/n3fit/scripts/evolven3fit_new.py @@ -35,7 +35,9 @@ def construct_eko_parser(subparsers): "-p", "--x-grid-points", default=None, type=int, help="Number of points of the x-grid" ) parser.add_argument( - "--legacy40", action="store_true", help="Use evolution grid used in NNPDF4.0 (for reproducibility)" + "--legacy40", + action="store_true", + help="Use evolution grid used in NNPDF4.0 (for reproducibility)", ) return parser @@ -166,12 +168,7 @@ def main(): ) elif args.actions == "produce_eko_photon": tcard, opcard = eko_utils.construct_eko_photon_cards( - args.theoryID, - args.q_fin, - x_grid, - args.q_gamma, - op_card_info, - theory_card_info, + args.theoryID, args.q_fin, x_grid, args.q_gamma, op_card_info, theory_card_info ) runner.solve(tcard, opcard, args.dump) diff --git a/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml b/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml index e85331f756..da53408bc4 100644 --- a/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml +++ b/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml @@ -12,9 +12,9 @@ description: n3fit regression test # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: -- { dataset: NMC, frac: 0.5 } -- { dataset: SLACP, frac: 0.5} -- { dataset: CMSZDIFF12, frac: 0.5, cfac: ['QCD'], sys: 10 } +- { dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.5, variant: legacy } +- { dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.5, variant: legacy } +- { dataset: CMS_WP_7TEV_ELECTRON_ASY, frac: 0.5} ############################################################ datacuts: @@ -74,16 +74,16 @@ kfold: threshold: 2.0 partitions: - datasets: - - NMC + - NMC_NC_NOTFIXED_P_EM-SIGMARED - datasets: - - SLACP - - CMSZDIFF12 + - SLAC_NC_NOTFIXED_P_EM-F2 + - CMS_WP_7TEV_ELECTRON_ASY ############################################################ genrep: False # on = generate MC replicas, False = use real data trvlseed: 3 nnseed: 2 -mcseed: 1 +mcseed: 1 parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] diff --git a/pyproject.toml b/pyproject.toml index d681d1bfca..942e8709a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,11 +27,14 @@ packages = [ ] # Data files include = [ - # The profile is included together with the validphys package + # The default profile is included together with the validphys package "validphys2/src/validphys/nnprofile_default.yaml", - # While commondata and theory.db are separated into the datafiles folder + # Same for commondata and theory.db "validphys2/src/validphys/datafiles/commondata/*", "validphys2/src/validphys/datafiles/theory.db", + # From the new commondata we are only interested on top-level yaml files + "validphys2/src/validphys/datafiles/new_commondata/*/*.yaml", + "validphys2/src/validphys/datafiles/new_commondata/dataset_names.yml", # The version file is ignored by git so it needs to be explicitly included "validphys2/src/validphys/_version.py" ] @@ -51,7 +54,6 @@ vp-get = "validphys.scripts.vp_get:main" vp-comparefits = "validphys.scripts.vp_comparefits:main" vp-fitrename = "validphys.scripts.vp_fitrename:main" vp-checktheory = "validphys.scripts.vp_checktheory:main" -vp-rebuild-data = "validphys.scripts.vp_rebuild_data:main" vp-pdfrename = "validphys.scripts.vp_pdfrename:main" vp-pdffromreplicas = "validphys.scripts.vp_pdffromreplicas:main" vp-list = "validphys.scripts.vp_list:main" diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 03191a4e99..5380ae18c6 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -1,16 +1,84 @@ """ -This module implements parsers for commondata and systype files into useful -datastructures, contained in the :py:mod:`validphys.coredata` module. +This module implements parsers for commondata and its associated metadata and uncertainties files +into useful structures that can be fed to the main :py:class:`validphys.coredata.CommonData` class. -The validphys commondata structure is an instance of :py:class:`validphys.coredata.CommonData` +A CommonData file is completely defined by a dataset name +(which defines the folder in which the information is) +and observable name (which defines the specific data, fktables and plotting settings to read). + +__{_}_ + +Where the folder name is ``__{_}`` + +The definition of all information for a given dataset (and all its observable) is in the +``metadata.yaml`` file and its ``implemented_observables``. + + +This module defines a number of parsers using the ``validobj`` library. + +The full ``metadata.yaml`` is read as a ``SetMetaData`` object +which contains a list of ``ObservableMetaData``. +These ``ObservableMetaData`` are the "datasets" of NNPDF for all intents and purposes. +The parent ``SetMetaData`` collects some shared variables such as the version of the dataset, +arxiv, inspire or hepdata ids, the folder in which the data is, etc. + +The main class in this module is thus ``ObservableMetaData`` which holds _all_ information +about the particular dataset-observable that we are interested in (and a reference to its parent). + +Inside the ``ObservableMetaData`` we can find: + - ``TheoryMeta``: contains the necessary information to read the (new style) fktables + - ``KinematicsMeta``: containins metadata about the kinematics + - ``PlottingOptions``: plotting style and information for validphys + - ``Variant``: variant to be used + +The CommonMetaData defines how the CommonData file is to be loaded, +by modifying the CommonMetaData using one of the loaded Variants one can change the resulting +:py:class:`validphys.coredata.CommonData` object. """ import dataclasses +from functools import cached_property, lru_cache import logging from operator import attrgetter +from pathlib import Path +from typing import Any, Dict, Optional +import numpy as np import pandas as pd +from validobj import ValidationError, parse_input +from validobj.custom import Parser + +# We cannot use ruamel directly due to the ambiguity ruamel.yaml / ruamel_yaml +# of some versions which are pinned in some of the conda packages we use... +from reportengine.compat import yaml +from validphys.coredata import KIN_NAMES, CommonData +from validphys.datafiles import new_to_legacy_map, path_commondata +from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions +from validphys.process_options import ValidProcess +from validphys.utils import parse_yaml_inp + +try: + # If libyaml is available, use the C loader to speed up some of the read + # https://pyyaml.org/wiki/LibYAML + # libyaml is available for most linux distributions + Loader = yaml.CLoader +except AttributeError: + # fallback to the slow loader + Loader = yaml.Loader + + +def _quick_yaml_load(filepath): + return yaml.load(filepath.read_text(encoding="utf-8"), Loader=Loader) -from validphys.coredata import CommonData + +# JCM: +# Some notes for developers +# The usage of `frozen` in the definitions of the dataclass is not strictly necessary +# however, changing the metadata can have side effects in many parts on validphys. +# By freezing the overall class (and leaving only specific attributes unfrozen) we have a more +# granular control. Please, use setter to modify frozen class instead of removing frozen. + +EXT = "pineappl.lz4" +_INDEX_NAME = "entry" log = logging.getLogger(__name__) @@ -28,6 +96,7 @@ "EWK_PT": ("$p_T$ (GeV)", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), "EWK_PTRAP": ("$\\eta/y$", "$p_T^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), "EWK_RAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), + "EWK_RAP_ASY": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), "HIG_RAP": ("$y$", "$M_H^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), "HQP_MQQ": ("$M^{QQ} (GeV)$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), "HQP_PTQ": ("$p_T^Q (GeV)$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s} (GeV)$"), @@ -40,22 +109,856 @@ "SIA": ("$z$", "$Q^2 (GeV^2)$", "$y$"), } +PROCESS_DESCRIPTION_LABEL = { + "EWJ_JRAP": "Jet Rapidity Distribution", + "EWK_RAP": "Drell-Yan Rapidity Distribution", + "EWJ_RAP": "Jet Rapidity Distribution", + "HQP_PTQ": "Heavy Quarks Production Single Quark Transverse Momentum Distribution", + "JET": "Jets Rapidity Distribution", + "HIG_RAP": "Higgs Rapidity Distribution", + "HQP_YQ": "Heavy Quarks Production Single Quark Rapidity Distribution", + "EWJ_JPT": "Jet Transverse Momentum Distribution", + "DIS": "Deep Inelastic Scattering", + "HQP_PTQQ": "Heavy Quarks Production Transverse Momentum Distribution", + "EWK_PT": "Drell-Yan Transverse Momentum Distribution", + "EWJ_PT": "Jet Transverse Momentum Distribution", + "PHT": "Photon Production", + "HQP_MQQ": "Heavy Quarks Production Mass Distribution", + "EWK_PTRAP": "Drell-Yan Transverse Momentum Distribution", + "HQP_YQQ": "Heavy Quarks Production Rapidity Distribution", + "INC": "Heavy Quarks Total Cross Section", + "EWJ_MLL": "Jet Mass Distribution", + "EWK_MLL": "Drell-Yan Mass Distribution", + "DIJET": "Dijets Invariant Mass and Rapidity Distribution", + "DYP": "Fixed-Target Drell-Yan", +} + + +def _get_ported_kinlabel(process_type): + """Get the kinematic label for ported datasets + In principle there is a one to one correspondance between the process label in the kinematic and + ``KINLABEL_LATEX``, however, there were some special cases that need to be taken into account + """ + process_type = str(process_type) + if process_type in KINLABEL_LATEX: + return KINLABEL_LATEX[process_type] + # special case in which the process in DIS- or DYP-like + if process_type[:3] in ("DIS", "DYP"): + return _get_ported_kinlabel(process_type[:3]) + if len(process_type.split("_")) > 1: + return _get_process_description(process_type.rsplit("_", 1)[0]) + raise KeyError(f"Label {process_type} not recognized in KINLABEL_LATEX") + + +def _get_process_description(process_type): + """Get the process description string for a given process type + Similarly to kinlabel, some special cases are taken into account. + """ + try: + return process_type.description + except AttributeError: + # This process needs to be updated + pass + + if process_type in PROCESS_DESCRIPTION_LABEL: + return PROCESS_DESCRIPTION_LABEL[process_type] + # If not, is this a DYP- or DIS-like dataset? + if process_type[:3] in ("DIS", "DYP"): + return _get_process_description(process_type[:3]) + # Remove pieces of "_" until it is found + if len(process_type.split("_")) > 1: + return _get_process_description(process_type.rsplit("_", 1)[0]) + raise KeyError(f"Label {process_type} not found in PROCESS_DESCRIPTION_LABEL") + + +@Parser +def ValidPath(path_str: str) -> Path: + """Parse strings into paths""" + return Path(path_str) + + +### Theory metadata +@Parser +def ValidOperation(op_str: Optional[str]) -> str: + """Ensures that the operation defined in the commondata file is implemented in validphys""" + if op_str is None: + op_str = "NONE" + ret = op_str.upper() + # TODO: move accepted operations to this module so that the convolution receives an operation to apply + # instead of an operation to understand + from validphys.convolution import OP + + if ret not in OP: + raise ValidationError(f"The operation '{op_str}' is not implemented in validphys") + return str(ret) + + +@dataclasses.dataclass +class ValidApfelComb: + # TODO: to be removed + repetition_flag: Optional[list[str]] = None + normalization: Optional[dict] = None + shifts: Optional[dict] = None + + +@dataclasses.dataclass(frozen=True) +class TheoryMeta: + """Contains the necessary information to load the associated fktables + + The theory metadata must always contain a key ``FK_tables`` which defines + the fktables to be loaded. + The ``FK_tables`` is organized as a double list such that: + + The inner list is concatenated + In practice these are different fktables that might refer to the same observable but + that are divided in subgrids for practical reasons. + The outer list instead are the operands for whatever operation needs to be computed + in order to match the experimental data. + + In addition there are other flags that can affect how the fktables are read or used: + - operation: defines the operation to apply to the outer list + - shifts: mapping with the single fktables and their respective shifts + useful to create "gaps" so that the fktables and the respective experimental data + are ordered in the same way (for instance, when some points are missing from a grid) + + This class is inmutable, what is read from the commondata metadata should be considered final + + Example + ------- + >>> from validphys.commondataparser import TheoryMeta + ... from validobj import parse_input + ... from reportengine.compat import yaml + ... theory_raw = ''' + ... FK_tables: + ... - - fk1 + ... - - fk2 + ... - fk3 + ... operation: ratio + ... apfelcomb: + ... repetition_flag: + ... - fk3 + ... ''' + ... theory = yaml.safe_load(theory_raw) + ... parse_input(theory, TheoryMeta) + TheoryMeta(FK_tables=[['fk1'], ['fk2', 'fk3']], operation='RATIO', shifts = None, conversion_factor=1.0, comment=None, apfelcomb=ValidApfelComb(repetition_flag=['fk3'], normalization=None)) + + """ + + FK_tables: list[tuple] + operation: ValidOperation = "NULL" + conversion_factor: float = 1.0 + shifts: Optional[dict] = None + + comment: Optional[str] = None + + # TODO: `apfelcomb` is transitional and will eventually be removed + apfelcomb: Optional[ValidApfelComb] = None + + def __post_init__(self): + """If a ``shifts`` flag is found in the apfelcomb object, move it outside""" + if self.apfelcomb is not None: + log.warning( + f"Apfelcomb key is being used to read {self.FK_tables}, please update the commondata file" + ) + if self.apfelcomb.shifts is not None and self.shifts is None: + object.__setattr__(self, 'shifts', self.apfelcomb.shifts) + self.apfelcomb.shifts = None + + def fktables_to_paths(self, grids_folder): + """Given a source for pineappl grids, constructs the lists of fktables + to be loaded""" + ret = [] + for operand in self.FK_tables: + ret.append([grids_folder / f"{m}.{EXT}" for m in operand]) + return ret + + @classmethod + def parser(cls, yaml_file): + """The yaml databases in the server use "operands" as key instead of "FK_tables" """ + if not yaml_file.exists(): + raise FileNotFoundError(yaml_file) + meta = yaml.safe_load(yaml_file.read_text()) + # Make sure the operations are upper-cased for compound-compatibility + meta["operation"] = "NULL" if meta["operation"] is None else meta["operation"].upper() + if "operands" in meta: + meta["FK_tables"] = meta.pop("operands") + return parse_input(meta, cls) + + def __hash__(self): + """Included in the hash any piece of information that can change the + definition of the theory for a given dataset for functions using a cache""" + to_be_hashed = [self.operation, self.conversion_factor] + to_be_hashed.append(tuple([tuple(i) for i in self.FK_tables])) + if self.shifts is not None: + to_be_hashed.append(tuple(self.shifts.keys())) + to_be_hashed.append(tuple(self.shifts.values())) + return hash(tuple(to_be_hashed)) + + +## Theory end + + +@dataclasses.dataclass(frozen=True) +class Variant: + """The new commondata format allow the usage of variants + A variant can overwrite a number of keys, as defined by this dataclass + """ + + data_uncertainties: Optional[list[ValidPath]] = None + theory: Optional[TheoryMeta] = None + data_central: Optional[ValidPath] = None + + +ValidVariants = Dict[str, Variant] + + +### Kinematic data +@dataclasses.dataclass(frozen=True) +class ValidVariable: + """Defines the variables""" + + label: str + description: str = "" + units: str = "" + + def full_label(self): + if self.units: + return f"{self.label} ({self.units})" + return self.label + + def apply_label(self, value): + """Return a string formatted as label = value (units)""" + tmp = f"{self.label} = {value}" + if self.units: + tmp += f" ({self.units})" + return tmp + + +@dataclasses.dataclass(frozen=True) +class ValidKinematics: + """Contains the metadata necessary to load the kinematics of the dataset. + The variables should be a dictionary with the key naming the variable + and the content complying with the ``ValidVariable`` spec. + + Only the kinematics defined by the key ``kinematic_coverage`` will be loaded, + which must be three. + + Three shall be the number of the counting and the number of the counting shall be three. + Four shalt thou not count, neither shalt thou count two, + excepting that thou then proceedeth to three. + Once the number three, being the number of the counting, be reached, + then the kinematics be loaded in the direction of thine validobject. + """ + + file: ValidPath + variables: Dict[str, ValidVariable] + + def get_label(self, var): + """For the given variable, return the label as label (unit) + If the label is an "extra" return the last one + """ + if var.startswith("extra_"): + return list(self.variables.values())[-1] + return self.variables[var].full_label() + + def apply_label(self, var, value): + """For a given value for a given variable, return the labels + as label = value (unit) + If the variable is not included in the list of variables, returns None + as the variable could've been transformed by a kinematic transformation + """ + if var not in self.variables: + return None + return self.variables[var].apply_label(value) + + +### kinematics end + + +### Observable and dataset definitions +@dataclasses.dataclass(frozen=True, eq=True) +class ObservableMetaData: + observable_name: str + observable: dict + ndata: int + # Plotting options + plotting: PlottingOptions + process_type: ValidProcess + kinematic_coverage: list[str] + + # Data itself + kinematics: ValidKinematics + data_uncertainties: list[ValidPath] + + # The central data is optional _only_ for + # positivity datasets, and will be checked as soon as the class is instantiated + data_central: Optional[ValidPath] = None + + # Optional data + theory: Optional[TheoryMeta] = None + tables: Optional[list] = dataclasses.field(default_factory=list) + npoints: Optional[list] = dataclasses.field(default_factory=list) + variants: Optional[ValidVariants] = dataclasses.field(default_factory=dict) + applied_variant: Optional[str] = None + ported_from: Optional[str] = None + + # Derived quantities: + # Note that an observable without a parent will fail in many different ways + _parent: Optional[Any] = None + + def __post_init__(self): + """ + Small modifications for better compatibility with the rest of validphys + """ + # Since vp will rely on the kinematics being 3 variables, + # fill the extra with whatever can be found in the kinematics dictionary + # otherwise just fill with extra_x + if len(self.kinematic_coverage) < 3: + unused = list(set(self.kinematics.variables) - set(self.kinematic_coverage)) + diff_to_3 = 3 - len(self.kinematic_coverage) + if unused: + nkincov = self.kinematic_coverage + unused[diff_to_3:] + else: + nkincov = self.kinematic_coverage + [f"extra_{i}" for i in range(diff_to_3)] + object.__setattr__(self, 'kinematic_coverage', nkincov) + + def __hash__(self): + """ObservableMetaData is defined by: + - the setname + - the variant used + - the data + """ + return hash((self.name, self.applied_variant, self.data_central)) + + def check(self): + """Various checks to apply manually to the observable before it is used anywhere + These are not part of the __post_init__ call since they can only happen after the metadata + has been read, the observable selected and (likely) variants applied. + """ + # Check whether the data central or the uncertainties are empty for a non-positivity/integrability set + if not self.is_lagrange_multiplier: + if self.data_central is None: + raise ValidationError(f"Missing `data_central` field for {self.name}") + + if not self.data_uncertainties: + ermsg = f"Missing `data_uncertainties` for {self.name}." + # be polite + if "legacy" in self.variants: + ermsg += " Maybe you intended to use `variant: legacy`?" + raise ValidationError(ermsg) + + # Check that plotting.plot_x is being filled + if self.plotting.plot_x is None: + ermsg = f"No variable selected as x-axis in the plot for {self.name}. Please add `plotting::plot_x`." + if self.plotting.x is not None: + ermsg += "Please replace `plotting::x` with `plotting::plot_x`." + raise ValidationError(ermsg) + + # Ensure that all variables in the kinematic coverage exist + for var in self.kinematic_coverage: + if var not in self.kinematics.variables: + raise ValidationError( + f"Variable {var} is in `kinematic_coverage` but not included in `kinematics` for {self.name}" + ) + + if len(self.kinematic_coverage) > 3: + raise ValidationError( + "Only a maximum of 3 variables can be used for `kinematic_coverage`" + ) + + def apply_variant(self, variant_name): + """Return a new instance of this class with the variant applied + + This class also defines how the variant is applied to the commondata + """ + try: + variant = self.variants[variant_name] + except KeyError as e: + raise ValueError(f"The requested variant does not exist {variant_name}") from e + + variant_replacement = {} + if variant.data_uncertainties is not None: + variant_replacement["data_uncertainties"] = variant.data_uncertainties + if variant.theory is not None: + variant_replacement["theory"] = variant.theory + if variant.data_central is not None: + variant_replacement["data_central"] = variant.data_central + + return dataclasses.replace(self, applied_variant=variant_name, **variant_replacement) + + @property + def is_positivity(self): + return self.setname.startswith("NNPDF_POS") + + @property + def is_integrability(self): + return self.setname.startswith("NNPDF_INTEG") + + @property + def is_lagrange_multiplier(self): + return self.is_positivity or self.is_integrability + + @property + def path_data_central(self): + return self._parent.folder / self.data_central + + def load_data_central(self): + """Loads the data for this commondata returns a dataframe + + Returns + ------- + pd.DataFrame + a dataframe containing the data + """ + if self.is_lagrange_multiplier: + data = np.zeros(self.ndata) + else: + datayaml = _quick_yaml_load(self.path_data_central) + data = datayaml["data_central"] + + if len(data) != self.ndata: + raise ValueError( + f"The number of bins in {self.path_data_central} does not match ndata={self.ndata}" + ) + + data_df = pd.DataFrame(data, index=range(1, self.ndata + 1), columns=["data"]) + data_df.index.name = _INDEX_NAME + return data_df + + @property + def paths_uncertainties(self): + return [self._parent.folder / i for i in self.data_uncertainties] + + def load_uncertainties(self): + """Returns a dataframe with all appropiate uncertainties + + Returns + ------- + pd.DataFrame + a dataframe containing the uncertainties + """ + if self.is_lagrange_multiplier: + return pd.DataFrame([{}] * self.ndata, index=range(1, self.ndata + 1)) + + all_df = [] + for ufile in self.paths_uncertainties: + uncyaml = _quick_yaml_load(ufile) + + mindex = pd.MultiIndex.from_tuples( + [(k, v["treatment"], v["type"]) for k, v in uncyaml["definitions"].items()], + names=["name", "treatment", "type"], + ) + bin_list = pd.DataFrame(uncyaml["bins"]).values.astype(float) + if len(bin_list) != self.ndata: + raise ValueError(f"The number of bins in {ufile} does not match ndata={self.ndata}") + + # I'm guessing there will be a better way of doing this than calling dataframe twice for the same thing? + final_df = pd.DataFrame(bin_list, columns=mindex, index=range(1, self.ndata + 1)) + final_df.index.name = _INDEX_NAME + all_df.append(final_df) + return pd.concat(all_df, axis=1) + + @property + def path_kinematics(self): + return self._parent.folder / self.kinematics.file + + def load_kinematics(self, fill_to_three=True, drop_minmax=True): + """Returns a dataframe with the kinematic information + + Parameters + ---------- + fill_to_three: bool + ensure that there are always three columns (repeat the last one) in the kinematics + + drop_minmax: bool + Drop the min and max value, necessary for legacy comparisons + + Returns + ------- + pd.DataFrame + a dataframe containing the kinematics + """ + kinematics_file = self.path_kinematics + kinyaml = _quick_yaml_load(kinematics_file) + + kin_dict = {} + for bin_index, dbin in enumerate(kinyaml["bins"], start=1): + for d in dbin.values(): + if d["mid"] is None: + d["mid"] = 0.5 * (d["max"] + d["min"]) + + if drop_minmax: + # TODO: for now we are dropping min/max information since it didn't exist in the past + d["min"] = None + d["max"] = None + else: + # If we are not dropping it, ensure that it has something! + d["min"] = d["min"] if d.get("min") is not None else d["mid"] + d["max"] = d["max"] if d.get("max") is not None else d["mid"] + + # The old commondata always had 3 kinematic variables and the code sometimes + # relies on this fact + # Add a fake one at the end repeating the last one + if fill_to_three and (ncol := len(dbin)) < 3: + for i in range(3 - ncol): + dbin[f"extra_{i}"] = d + + kin_dict[bin_index] = pd.DataFrame(dbin).stack() + + if len(kin_dict) != self.ndata: + raise ValueError( + f"The number of bins in {kinematics_file} does not match ndata={self.ndata}" + ) + + return pd.concat(kin_dict, axis=1, names=[_INDEX_NAME]).swaplevel(0, 1).T + + # Properties inherited from parent + @property + def nnpdf_metadata(self): + return self._parent.nnpdf_metadata + + @property + def setname(self): + return self._parent.setname + + @property + def experiment(self): + return self.setname.split("_")[0] + + @property + def process(self): + return self.setname.split("_")[1] + + @property + def cm_energy(self): + return self._parent.cm_energy + + @property + def name(self): + return f"{self.setname}_{self.observable_name}" + + @property + def is_ported_dataset(self): + """Return True if this is an automatically ported dataset that has not been updated""" + if self.ported_from is None: + return False + + # If it is using a legacy variant and has a ported_from field, then it is a ported one + if self.applied_variant is not None and self.applied_variant.startswith("legacy"): + return True + + # If not using a legacy variant, we consider it ported if the kin variables are still k1,k2,k3 + return {"k1", "k2", "k3"} == set(self.kinematic_coverage) + + @property + def kinlabels(self): + """Return the kinematic labels in the same order as they are set + in ``kinematic_coverage`` (which in turns follow the key kinematic_coverage) + If this is a ported dataset, rely on the process type using the legacy labels + """ + if self.is_ported_dataset: + return _get_ported_kinlabel(self.process_type) + return [self.kinematics.get_label(i) for i in self.kinematic_coverage] + + def digest_plotting_variable(self, variable): + """Digest plotting variables in the ``line_by`` or ``figure_by`` fields + and return the appropiate ``kX`` or other label such that the plotting functions + of validphys can understand it. + + These might be variables included as part of the kinematics or extra labels + defined in the plotting dictionary. + """ + # If it is part of the coverage, just return the relevant KN + if variable in self.kinematic_coverage: + fig_idx = self.kinematic_coverage.index(variable) + return f"k{fig_idx + 1}" + + # If it is not in the coverage, it might be a _known_ extra label + if self.plotting.extra_labels is not None and variable in self.plotting.extra_labels: + # In that case return it raw + return variable + + # Or, it might be a variable that VP knows how to deal with automagically + if variable in labeler_functions: + return variable + + raise ValueError(f"Don't know what to do with plotting variable {variable} for {self.name}") + + def _plotting_options_set(self): + """Set and return the PlottingOptions metadata + + Fill in missing information that can be learnt from the other variables (xlabel/ylabel) + or that is shared by the whole dataset. + """ + if self.plotting.already_digested: + return self.plotting + + if self.plotting.nnpdf31_process is None: + self.plotting.nnpdf31_process = self.nnpdf_metadata["nnpdf31_process"] + + if self.plotting.experiment is None: + self.plotting.experiment = self.nnpdf_metadata["experiment"] + + if self.plotting.process_description is None: + self.plotting.process_description = _get_process_description(self.process_type) + + ## Swap variables by the k_idx + # Internally validphys takes the x/y to be "k1" "k2" or "k3" + # Therefore, for the time being, swap the actual keys by k1/k2/k3 + try: + x_idx = self.kinematic_coverage.index(self.plotting.plot_x) + self.plotting.x = f"k{x_idx + 1}" + + if self.plotting.x_label is None and not self.is_ported_dataset: + self.plotting.x_label = self.kinematics.get_label(self.plotting.plot_x) + + except ValueError: + # it is possible that the x value is an "extra", if that's the case continue + self.plotting.x = self.plotting.plot_x + self.plotting.x_label = None + + # Swap the `figure_by` and `line_by` variables by k1/k2/k3 + # unless this is something coming from the "extra labels" + if self.plotting.figure_by is not None: + new_fig_by = [] + for var in self.plotting.figure_by: + new_fig_by.append(self.digest_plotting_variable(var)) + self.plotting.figure_by = new_fig_by + + if self.plotting.line_by is not None: + new_line_by = [] + for var in self.plotting.line_by: + new_line_by.append(self.digest_plotting_variable(var)) + self.plotting.line_by = new_line_by + + self.plotting.already_digested = True + return self.plotting + + @cached_property + def plotting_options(self): + try: + return self._plotting_options_set() + except Exception as e: + # There are many chances for failure here + log.error(f"Failure for: {self.name}") + raise e + + +@dataclasses.dataclass(frozen=True) +class ValidReference: + """Holds literature information for the dataset""" + + url: str + version: Optional[int] = None + journal: Optional[str] = None + tables: list[int] = dataclasses.field(default_factory=list) + + +@dataclasses.dataclass(frozen=True) +class SetMetaData: + """Metadata of the whole set""" + + setname: str + version: int + version_comment: str + nnpdf_metadata: dict + implemented_observables: list[ObservableMetaData] + arXiv: Optional[ValidReference] = None + iNSPIRE: Optional[ValidReference] = None + hepdata: Optional[ValidReference] = None + + @property + def folder(self): + return path_commondata / self.setname + + @property + def cm_energy(self): + """Return the center of mass energy as GeV if it can be understood from the name + otherwise return None""" + energy_string = self.setname.split("_")[2] + if energy_string == "NOTFIXED": + return None + if energy_string.endswith("GEV"): + factor = 1.0 + elif energy_string.endswith("TEV"): + factor = 1000 + else: + return None + return float(energy_string[:-3].replace("P", ".")) * factor + + @cached_property + def allowed_observables(self): + """ + Returns the implemented observables as a {observable_name.upper(): observable} dictionary + """ + return {o.observable_name.upper(): o for o in self.implemented_observables} + + def select_observable(self, obs_name_raw): + """Check whether the observable is implemented and return said observable""" + obs_name = obs_name_raw.upper() + try: + observable = self.allowed_observables[obs_name] + except KeyError: + raise ValueError( + f"The selected observable {obs_name_raw} does not exist in {self.setname}" + ) + + # Now burn the _parent key into the observable and apply checks + object.__setattr__(observable, "_parent", self) + return observable + + +@lru_cache +def _parse_entire_set_metadata(metadata_file): + """Read the metadata file""" + return parse_yaml_inp(metadata_file, SetMetaData) + + +@lru_cache +def parse_new_metadata(metadata_file, observable_name, variant=None): + """Given a metadata file in the new format and the specific observable to be read + load and parse the metadata and select the observable. If any variants are selected, apply them. + + The triplet (metadata_file, observable_name, variant) define unequivocally the information + to be parsed from the commondata library + """ + set_metadata = _parse_entire_set_metadata(metadata_file) + + # Select one observable from the entire metadata + metadata = set_metadata.select_observable(observable_name) + + # And apply variant if given + if variant is not None: + metadata = metadata.apply_variant(variant) + + return metadata + + +def load_commondata_new(metadata): + """ + + TODO: update this docstring since now the load_commondata_new takes the information from + the metadata, and the name -> split is done outside + + In the current iteration of the commondata, each of the commondata + (i.e., an observable from a data publication) correspond to one single observable + inside a folder which is named as "___" + The observable is defined by a last suffix of the form "_" so that the full name + of the dataset is always: + + "__{_}_" + + where is optional. + + This function right now works under the assumotion that the folder/observable + is separated in the last _ so that: + folder_name = __{_} + but note that this convention is still not fully defined. + + This function returns a commondata object constructed by parsing the metadata. + + Once a variant is selected, it can no longer be changed + + Note that this function reproduces `parse_commondata` below, which parses the + _old_ file format + """ + # Before loading, apply the checks + metadata.check() + + # Now parse the data + data_df = metadata.load_data_central() + # the uncertainties + uncertainties_df = metadata.load_uncertainties() + # and the kinematics + kin_df = metadata.load_kinematics() + + # Once we have loaded all uncertainty files, let's check how many sys we have + nsys = len( + [i for i in uncertainties_df.columns.get_level_values(0) if not i.startswith("stat")] + ) + + # Backwards-compatibility + # Finally, create the commondata by merging the dataframes in the old commondata_table + + procname = metadata.process_type # nnpdf_metadata["nnpdf31_process"] + kin_df = kin_df[metadata.kinematic_coverage] + kin_df.columns = KIN_NAMES + kin_df["process"] = procname + + kin_df = kin_df[["process"] + KIN_NAMES] + + # For the uncertainties, create a simplified version to concatenate + # and save the systype information + new_columns = [] + systypes = {"treatment": [], "name": []} + for col in uncertainties_df.columns: + if col[0].startswith("stat"): + new_columns.append("stat") + else: + # if it is syst add the ADD/MULT information + new_columns.append(col[1]) + systypes["treatment"].append(col[1]) + systypes["name"].append(col[2]) + + uncertainties_df.columns = new_columns + + commondata_table = pd.concat([kin_df, data_df, uncertainties_df], axis=1) + systype_table = pd.DataFrame(systypes, index=range(1, nsys + 1)) + systype_table.index.name = "sys_index" + + # TODO: Legacy compatibility + # 1. Add a stat column if it doesn't exist + # 2. Transform multiplicative uncertainties into % as it was done in the older version + + if "stat" not in commondata_table: + commondata_table["stat"] = 0.0 + + if "MULT" in commondata_table: + commondata_table["MULT"] = commondata_table["MULT"].multiply( + 100 / commondata_table["data"], axis="index" + ) + + # TODO: For the time being, fill `legacy_name` with the new name if not found + legacy_name = metadata.name + + if (old_name := new_to_legacy_map(metadata.name, metadata.applied_variant)) is not None: + legacy_name = old_name + + return CommonData( + setname=metadata.name, + ndata=metadata.ndata, + commondataproc=procname, + nkin=3, + nsys=nsys, + commondata_table=commondata_table, + systype_table=systype_table, + legacy=False, + legacy_name=legacy_name, + kin_variables=metadata.kinematic_coverage, + ) + + +########################################### + +@lru_cache def load_commondata(spec): """ Load the data corresponding to a CommonDataSpec object. Returns an instance of CommonData """ - commondatafile = spec.datafile - setname = spec.name - systypefile = spec.sysfile + if spec.legacy: + commondatafile = spec.datafile + setname = spec.name + systypefile = spec.sysfile - commondata = parse_commondata(commondatafile, systypefile, setname) + return load_commondata_old(commondatafile, systypefile, setname) - return commondata + return load_commondata_new(spec.metadata) -def parse_commondata(commondatafile, systypefile, setname): +### Old commondata: +def load_commondata_old(commondatafile, systypefile, setname): """Parse a commondata file and a systype file into a CommonData. Parameters @@ -84,8 +987,8 @@ def parse_commondata(commondatafile, systypefile, setname): commondataproc = commondatatable["process"][1] # Check for consistency with commondata metadata cdmetadata = peek_commondata_metadata(commondatafile) - if (setname, nsys, ndata) != attrgetter("name", "nsys", "ndata")(cdmetadata): - raise ValueError("Commondata table information does not match metadata") + if (nsys, ndata) != attrgetter("nsys", "ndata")(cdmetadata): + raise ValueError(f"Commondata table information does not match metadata for {setname}") # Now parse the systype file systypetable = parse_systypes(systypefile) @@ -99,12 +1002,13 @@ def parse_commondata(commondatafile, systypefile, setname): nsys=nsys, commondata_table=commondatatable, systype_table=systypetable, + legacy=True, ) def parse_systypes(systypefile): """Parses a systype file and returns a pandas dataframe.""" - systypeheader = ["sys_index", "type", "name"] + systypeheader = ["sys_index", "treatment", "name"] try: systypetable = pd.read_csv( systypefile, sep=r"\s+", names=systypeheader, skiprows=1, header=None @@ -150,7 +1054,8 @@ def get_plot_kinlabels(commondata): """Return the LaTex kinematic labels for a given Commondata""" key = commondata.process_type - return KINLABEL_LATEX[key] + # TODO: the keys in KINLABEL_LATEX need to be updated for the new commondata + return KINLABEL_LATEX.get(key, key) def get_kinlabel_key(process_label): @@ -160,6 +1065,9 @@ def get_kinlabel_key(process_label): """ l = process_label try: + if process_label == "EWK_RAP_ASY": + # TODO this function is disappearing in this PR + l = "EWK_RAP" return next(k for k in sorted(KINLABEL_LATEX, key=len, reverse=True) if l.startswith(k)) except StopIteration as e: raise ValueError( diff --git a/validphys2/src/validphys/commondatawriter.py b/validphys2/src/validphys/commondatawriter.py deleted file mode 100644 index 650df84cbc..0000000000 --- a/validphys2/src/validphys/commondatawriter.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -This module contains functions to write commondata and systypes -tables to files -""" - - -def write_commondata_data(commondata, buffer): - """ - write commondata table to buffer, this can be a memory map, - compressed archive or strings (using for instance StringIO) - - - Parameters - ---------- - - commondata : validphys.coredata.CommonData - - buffer : memory map, compressed archive or strings - example: StringIO object - - - Example - ------- - >>> from validphys.loader import Loader - >>> from io import StringIO - - >>> l = Loader() - >>> cd = l.check_commondata("NMC").load_commondata_instance() - >>> sio = StringIO() - >>> write_commondata_data(cd,sio) - >>> print(sio.getvalue()) - - """ - header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" - buffer.write(header) - commondata.commondata_table.to_csv(buffer, sep="\t", header=None) - - -def write_commondata_to_file(commondata, path): - """ - write commondata table to file - """ - with open(path, "w") as file: - write_commondata_data(commondata, file) - - -def write_systype_data(commondata, buffer): - """ - write systype table to buffer, this can be a memory map, - compressed archive or strings (using for instance StringIO) - - - Parameters - ---------- - - commondata : validphys.coredata.CommonData - - buffer : memory map, compressed archive or strings - example: StringIO object - - - Example - ------- - >>> from validphys.loader import Loader - >>> from io import StringIO - - >>> l = Loader() - >>> cd = l.check_commondata("NMC").load_commondata_instance() - >>> sio = StringIO() - >>> write_systype_data(cd,sio) - >>> print(sio.getvalue()) - - """ - header = f"{commondata.nsys}\n" - buffer.write(header) - commondata.systype_table.to_csv(buffer, sep="\t", header=None) - - -def write_systype_to_file(commondata, path): - """ - write systype table to file - """ - with open(path, "w") as file: - write_systype_data(commondata, file) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index d59372c3a7..b262dcafd4 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -26,6 +26,7 @@ SimilarCuts, ThCovMatSpec, ) +from validphys.datafiles import legacy_to_new_map from validphys.fitdata import fitted_replica_indexes, num_fitted_replicas from validphys.gridvalues import LUMI_CHANNELS from validphys.loader import ( @@ -38,7 +39,7 @@ PDFNotFound, ) from validphys.paramfits.config import ParamfitsConfig -from validphys.plotoptions import get_info +from validphys.plotoptions.core import get_info import validphys.scalevariations from validphys.utils import freeze_args @@ -364,15 +365,33 @@ def produce_fitpdfandbasis(self, fitpdf, basisfromfit): @element_of("dataset_inputs") def parse_dataset_input(self, dataset: Mapping): - """The mapping that corresponds to the dataset specifications in the - fit files""" - known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group"} + """The mapping that corresponds to the dataset specifications in the fit files + + This mapping is such that + dataset: str + name of the dataset to load + variant: str + variant of the dataset to load + cfac: list + list of cfactors to apply + frac: float + fraction of the data to consider for training purposes + weight: float + extra weight to give to the dataset + custom_group: str + custom group to apply to the dataset + + Note that the `sys` key is deprecated and allowed only for old-format dataset. + + Old-format commondata will be translated to the new version in this function. + """ + accepted_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "variant"} try: name = dataset["dataset"] if not isinstance(name, str): raise ConfigError(f"'dataset' must be a string, not {type(name)}") # Check whether this is an integrability or positivity dataset (in the only way we know?) - if name.startswith(("INTEG", "POS")): + if name.startswith(("NNPDF_INTEG", "NNPDF_POS", "POS", "INTEG")): if name.startswith("INTEG"): raise ConfigError("Please, use `integdataset` for integrability") if name.startswith("POS"): @@ -380,25 +399,47 @@ def parse_dataset_input(self, dataset: Mapping): except KeyError: raise ConfigError("'dataset' must be a mapping with " "'dataset' and 'sysnum'") - sysnum = dataset.get("sys") + # Ensure that we can actually read the `dataset_input` before failure + kdiff = dataset.keys() - accepted_keys + for k in kdiff: + # Abuse ConfigError to get the suggestions. + log.warning( + ConfigError(f"Key '{k}' in dataset_input not known ({name}).", k, accepted_keys) + ) + cfac = dataset.get("cfac", tuple()) + custom_group = str(dataset.get("custom_group", "unset")) + frac = dataset.get("frac", 1) if not isinstance(frac, numbers.Real): - raise ConfigError(f"'frac' must be a number, not '{frac}'") + raise ConfigError(f"'frac' must be a number, not '{frac}' ({name})") if frac < 0 or frac > 1: - raise ConfigError(f"'frac' must be between 0 and 1 not '{frac}'") + raise ConfigError(f"'frac' must be between 0 and 1 not '{frac}' ({name})") + weight = dataset.get("weight", 1) if not isinstance(weight, numbers.Real): - raise ConfigError(f"'weight' must be a number, not '{weight}'") + raise ConfigError(f"'weight' must be a number, not '{weight}' ({name})") if weight < 0: - raise ConfigError(f"'weight' must be greater than zero not '{weight}'") - custom_group = str(dataset.get("custom_group", "unset")) - kdiff = dataset.keys() - known_keys - for k in kdiff: - # Abuse ConfigError to get the suggestions. - log.warning(ConfigError(f"Key '{k}' in dataset_input not known.", k, known_keys)) + raise ConfigError(f"'weight' must be greater than zero not '{weight}' ({name})") + + variant = dataset.get("variant") + sysnum = dataset.get("sys") + + if variant is not None and sysnum is not None: + raise ConfigError(f"The 'variant' and 'sys' keys cannot be used together ({name})") + + if variant is None: + # If a variant is not given this could be an old commondata, try to translate it! + name, variant = legacy_to_new_map(name, sysnum) + return DataSetInput( - name=name, sys=sysnum, cfac=cfac, frac=frac, weight=weight, custom_group=custom_group + name=name, + sys=sysnum, + cfac=cfac, + frac=frac, + weight=weight, + custom_group=custom_group, + variant=variant, ) def parse_use_fitcommondata(self, do_use: bool): @@ -413,7 +454,11 @@ def produce_commondata(self, *, dataset_input, use_fitcommondata=False, fit=None sysnum = dataset_input.sys try: return self.loader.check_commondata( - setname=name, sysnum=sysnum, use_fitcommondata=use_fitcommondata, fit=fit + setname=name, + sysnum=sysnum, + use_fitcommondata=use_fitcommondata, + fit=fit, + variant=dataset_input.variant, ) except DataNotFoundError as e: raise ConfigError(str(e), name, self.loader.available_datasets) from e @@ -561,6 +606,7 @@ def produce_dataset( cfac = dataset_input.cfac frac = dataset_input.frac weight = dataset_input.weight + variant = dataset_input.variant try: ds = self.loader.check_dataset( @@ -573,6 +619,7 @@ def produce_dataset( use_fitcommondata=use_fitcommondata, fit=fit, weight=weight, + variant=variant, ) except DataNotFoundError as e: raise ConfigError(str(e), name, self.loader.available_datasets) @@ -581,8 +628,6 @@ def produce_dataset( raise ConfigError(e) if check_plotting: - from validphys.plotoptions import get_info - # normalize=True should check for more stuff get_info(ds, normalize=True) if not ds.commondata.plotfiles: @@ -962,6 +1007,8 @@ def _parse_lagrange_multiplier(self, kind, theoryid, setdict): lambda_key = "poslambda" try: name = setdict["dataset"] + # Swap a possibly old name with the new one + name, _ = legacy_to_new_map(name, None) maxlambda = float(setdict[lambda_key]) except KeyError as e: raise ConfigError(bad_msg, setdict.keys(), e.args[0]) from e diff --git a/validphys2/src/validphys/convolution.py b/validphys2/src/validphys/convolution.py index 859b9fdeef..4e51b2b3f6 100644 --- a/validphys2/src/validphys/convolution.py +++ b/validphys2/src/validphys/convolution.py @@ -42,7 +42,6 @@ import numpy as np import pandas as pd -from validphys.fkparser import load_fktable from validphys.pdfbases import evolution FK_FLAVOURS = evolution.to_known_elements( @@ -116,10 +115,10 @@ def _predictions(dataset, pdf, fkfunc): "therefore produce predictions whose shape doesn't match the uncut " "commondata and is not supported." ) - cuts = dataset.cuts.load() + cuts = dataset.cuts all_predictions = [] for fk in dataset.fkspecs: - fk_w_cuts = load_fktable(fk).with_cuts(cuts) + fk_w_cuts = fk.load_with_cuts(cuts) all_predictions.append(fkfunc(fk_w_cuts, pdf)) # Old fktables repeated values to make DEN and NUM sizes match in RATIO operations # pineappl tables instead just contain the one value used diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 2f4b3f3351..300b8bb7fa 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- """ Core datastructures used in the validphys data model. """ -from dataclasses import dataclass +import dataclasses import enum import functools import inspect @@ -20,11 +19,7 @@ # TODO: There is a bit of a circular dependency between filters.py and this. # Maybe move the cuts logic to its own module? from validphys import filters, lhaindex -from validphys.commondataparser import ( - get_plot_kinlabels, - parse_commondata, - peek_commondata_metadata, -) +from validphys.commondataparser import get_plot_kinlabels, load_commondata, peek_commondata_metadata from validphys.fkparser import load_fktable, parse_cfactor from validphys.hyperoptplot import HyperoptTrial from validphys.lhapdfset import LHAPDFSet @@ -51,7 +46,7 @@ def __hash__(self): def __repr__(self): argvals = ', '.join('%s=%r' % vals for vals in zip(self.argnames(), self.comp_tuple)) - return '%s(%s)' % (self.__class__.__qualname__, argvals) + return '{}({})'.format(self.__class__.__qualname__, argvals) class PDFDoesNotExist(Exception): @@ -132,7 +127,7 @@ def info(self): if self._info is None: try: self._info = lhaindex.parse_info(self.name) - except IOError as e: + except OSError as e: raise PDFDoesNotExist(self.name) from e return self._info @@ -211,21 +206,78 @@ def get_members(self): class CommonDataSpec(TupleComp): - def __init__(self, datafile, sysfile, plotfiles, name=None, metadata=None): + """Holds all the information necessary to load a commondata file and provides + methods to easily access them + + Arguments + --------- + name: str + name of the commondata + metadata: ObservableMetaData + instance of ObservableMetaData holding all information about the dataset + legacy: bool + whether this is an old or new format metadata file + + The ``datafile``, ``sysfile`` and `plotfiles`` arguments are deprecated + and only to be used with ``legacy=True`` + """ + + def __init__(self, name, metadata, legacy=False, datafile=None, sysfile=None, plotfiles=None): + self.legacy = legacy + self._metadata = metadata + + # Some checks + if legacy: + if datafile is None or sysfile is None or plotfiles is None: + raise ValueError( + "Legacy CommonDataSpec need datafile, sysfile and plotfiles arguments" + ) + else: + if sysfile is not None: + raise ValueError("New CommonDataSpec don't need sysfile input") + if plotfiles is not None: + raise ValueError("New CommonDataSpec don't need plotfile input") + self.datafile = datafile self.sysfile = sysfile - self.plotfiles = tuple(plotfiles) - self._name = name - self._metadata = metadata - super().__init__(datafile, sysfile, self.plotfiles) + if legacy: + self.plotfiles = tuple(plotfiles) + super().__init__(datafile, sysfile, self.plotfiles) + else: + self.plotfiles = False + super().__init__(name, self.metadata) + + def with_modified_data(self, central_data_file, uncertainties_file=None): + """Returns a copy of this instance with a new data file in the metadata""" + if self.legacy: + return self.__class__( + self.name, + self.metadata, + legacy=True, + datafile=central_data_file, + sysfile=self.sysfile, + plotfiles=self.plotfiles, + ) + + modified_args = {"data_central": central_data_file} + + if uncertainties_file is not None: + modified_args["data_uncertainties"] = [uncertainties_file] + + new_metadata = dataclasses.replace(self.metadata, **modified_args) + return self.__class__(self.name, new_metadata) @property def name(self): return self.metadata.name - @property + @functools.cached_property def nsys(self): - return self.metadata.nsys + if self.legacy: + return self.metadata.nsys + else: + cd = self.load() + return cd.nsys @property def ndata(self): @@ -237,44 +289,54 @@ def process_type(self): @property def metadata(self): - if self._metadata is None: + if self.legacy: self._metadata = peek_commondata_metadata(self.datafile) return self._metadata + @functools.cached_property + def legacy_name(self): + if self.legacy: + raise ValueError(f"This is already a legacy dataset: {self}") + return self.load().legacy_name + + @property + def theory_metadata(self): + if self.legacy: + return None + return self.metadata.theory + def __str__(self): return self.name def __iter__(self): return iter((self.datafile, self.sysfile, self.plotfiles)) - @functools.lru_cache() def load(self): - return parse_commondata(self.datafile, self.sysfile, self.name) - - def load_commondata_instance(self): """ load a validphys.core.CommonDataSpec to validphys.core.CommonData """ - from validphys.commondataparser import load_commondata - return load_commondata(self) @property def plot_kinlabels(self): - return get_plot_kinlabels(self) + if self.legacy: + return get_plot_kinlabels(self) + else: + return self.metadata.kinlabels class DataSetInput(TupleComp): """Represents whatever the user enters in the YAML to specify a dataset.""" - def __init__(self, *, name, sys, cfac, frac, weight, custom_group): + def __init__(self, *, name, sys, cfac, frac, weight, custom_group, variant): self.name = name self.sys = sys self.cfac = cfac self.frac = frac self.weight = weight self.custom_group = custom_group + self.variant = variant super().__init__(name, sys, cfac, frac, weight, custom_group) def __str__(self): @@ -362,7 +424,6 @@ def __init__(self, inputs, threshold): @functools.lru_cache() def load(self): # TODO: Update this when a suitable interace becomes available - from validphys.commondataparser import load_commondata from validphys.convolution import central_predictions from validphys.covmats import covmat_from_systematics @@ -405,10 +466,15 @@ def __init__(self, *, name, commondata, fkspecs, thspec, cuts, frac=1, op=None, self.cuts = cuts self.frac = frac - # Do this way (instead of setting op='NULL' in the signature) - # so we don't have to know the default everywhere + # If OP is None, check whether the commondata is setting an operation + # TODO: eventually the operation will _always_ be set from the commondata, but for legacy + # compatibility it will be also controllable as an input argument if op is None: - op = 'NULL' + if commondata.theory_metadata is None: + op = 'NULL' + else: + op = commondata.theory_metadata.operation + self.op = op self.weight = weight @@ -473,10 +539,10 @@ def __init__(self, fkpath, cfactors, metadata=None): self.fkpath = fkpath self.metadata = metadata - # For new theories, add also the target_dataset so that we don't reuse fktables - # Ideally this won't be necessary in the future and we will be able to reutilize fktables. + # For non-legacy theory, add the metadata since it defines how the theory is to be loaded + # and thus, it should also define the hash of the class if not self.legacy: - super().__init__(fkpath, cfactors, self.metadata.get("target_dataset")) + super().__init__(fkpath, cfactors, self.metadata) else: super().__init__(fkpath, cfactors) @@ -557,7 +623,7 @@ def load_commondata_instance(self): """ commodata_list = [] for dataset in self.datasets: - cd = dataset.commondata.load_commondata_instance() + cd = dataset.commondata.load() if dataset.cuts is None: commodata_list.append(cd) else: @@ -661,7 +727,7 @@ def get_all_trials(self, base_params=None): """ all_trials = [] for trial_file in self.tries_files.values(): - with open(trial_file, "r") as tf: + with open(trial_file) as tf: run_trials = [] for trial in json.load(tf): trial = HyperoptTrial(trial, base_params=base_params, linked_trials=run_trials) @@ -694,7 +760,7 @@ def sample_trials(self, n=None, base_params=None, sigma=4.0): return np.random.choice(all_trials, replace=False, size=n, p=weights) -@dataclass +@dataclasses.dataclass class TheoryIDSpec: id: int path: Path @@ -858,4 +924,4 @@ def as_pair(self): return self.label, self.indexes def __str__(self): - return '%s: %s' % (self.label, self.indexes) + return '{}: {}'.format(self.label, self.indexes) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 8f442a9a35..b3e8c21978 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -1,15 +1,19 @@ """ Data containers backed by Python managed memory (Numpy arrays and Pandas -dataframes). +dataframes). """ import dataclasses +import logging +from typing import Optional import numpy as np import pandas as pd -from validphys.commondatawriter import write_commondata_to_file, write_systype_to_file +from reportengine.compat import yaml +from validphys.utils import generate_path_filtered_data KIN_NAMES = ["kin1", "kin2", "kin3"] +log = logging.getLogger(__name__) @dataclasses.dataclass(eq=False) @@ -115,7 +119,12 @@ def with_cuts(self, cuts): if cuts is None or self.protected: return self newndata = len(cuts) - newsigma = self.sigma.loc[cuts] + try: + newsigma = self.sigma.loc[cuts] + except KeyError as e: + # This will be an ugly erorr msg, but it should be scary anyway + log.error(f"Problem applying cuts to {self.metadata}") + raise e return dataclasses.replace(self, ndata=newndata, sigma=newsigma) @property @@ -245,12 +254,19 @@ class CommonData: nsys: int commondata_table: pd.DataFrame = dataclasses.field(repr=False) systype_table: pd.DataFrame = dataclasses.field(repr=False) - systematics_table: pd.DataFrame = dataclasses.field(init=None, repr=False) + legacy: bool + systematics_table: Optional[pd.DataFrame] = dataclasses.field(init=None, repr=False) + legacy_name: Optional[str] = None + kin_variables: Optional[list] = None def __post_init__(self): self.systematics_table = self.commondata_table.drop( columns=["process", "data", "stat"] + KIN_NAMES ) + if self.legacy_name is None: + self.legacy_name = self.setname + # TODO: set for now commondataproc as a string as well + self.commondataproc = str(self.commondataproc) def with_cuts(self, cuts): """A method to return a CommonData object where @@ -314,13 +330,15 @@ def multiplicative_errors(self): in a percentage format, with SKIP uncertainties removed. """ - mult_systype = self.systype_table[self.systype_table["type"] == "MULT"] - # NOTE: Index with list here so that return is always a DataFrame, even - # if N_sys = 1 (else a Series could be returned) - mult_table = self.systematics_table.loc[:, ["MULT"]] - # Minus 1 because iloc starts from 0, while the systype counting starts - # from 1. - mult_table = mult_table.iloc[:, mult_systype.index - 1] + mult_systype = self.systype_table[self.systype_table["treatment"] == "MULT"] + mult_table = self.systematics_table.filter(like="MULT") + + if self.legacy: + # Needed in legacy because every uncertainty appears as both mult and add + # so it is necessary to select the uncertainties that are to be consireded as MULT/ADD + # Minus 1 because iloc starts from 0, while the systype counting starts from 1 + mult_table = mult_table.iloc[:, mult_systype.index - 1] + mult_table.columns = mult_systype["name"].to_numpy() return mult_table.loc[:, mult_table.columns != "SKIP"] @@ -331,13 +349,13 @@ def additive_errors(self): removed. """ - add_systype = self.systype_table[self.systype_table["type"] == "ADD"] - # NOTE: Index with list here so that return is always a DataFrame, even - # if N_sys = 1 (else a Series could be returned) - add_table = self.systematics_table.loc[:, ["ADD"]] - # Minus 1 because iloc starts from 0, while the systype counting starts - # from 1. - add_table = add_table.iloc[:, add_systype.index - 1] + add_systype = self.systype_table[self.systype_table["treatment"] == "ADD"] + add_table = self.systematics_table.filter(like="ADD") + + if self.legacy: + # Minus 1 because iloc starts from 0, while the systype counting starts from 1 + add_table = add_table.iloc[:, add_systype.index - 1] + add_table.columns = add_systype["name"].to_numpy() return add_table.loc[:, add_table.columns != "SKIP"] @@ -370,17 +388,45 @@ def systematic_errors(self, central_values=None): converted_mult_errors = self.multiplicative_errors * central_values[:, np.newaxis] / 100 return pd.concat((self.additive_errors, converted_mult_errors), axis=1) - def export(self, path): - """Export the data, and error types - Use the same format as libNNPDF: - - - A DATA_.dat file with the dataframe of accepted points - - A systypes/STYPES_.dat file with the error types + def export_data(self, buffer): + """Exports the central data defined by this commondata instance to the given buffer""" + ret = {"data_central": self.central_values.tolist()} + yaml.safe_dump(ret, buffer) + + def export_uncertainties(self, buffer): + """Exports the uncertainties defined by this commondata instance to the given buffer""" + definitions = {} + for idx, row in self.systype_table.iterrows(): + definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} + + bins = [] + for idx, row in self.systematic_errors().iterrows(): + tmp = {"stat": float(self.stat_errors[idx])} + # Hope things come in the right order... + for key_name, val in zip(definitions, row): + tmp[key_name] = float(val) + + bins.append(tmp) + + definitions["stat"] = { + "description": "Uncorrelated statistical uncertainties", + "treatment": "ADD", + "type": "UNCORR", + } + + ret = {"definitions": definitions, "bins": bins} + yaml.safe_dump(ret, buffer) + + def export(self, folder_path): + """Wrapper around export_data and export_uncertainties + to write both uncertainties and data after filtering to a given folder """ - - dat_path = path / f"DATA_{self.setname}.dat" - sys_path = path / "systypes" / f"SYSTYPE_{self.setname}_DEFAULT.dat" - sys_path.parent.mkdir(exist_ok=True) - - write_systype_to_file(self, sys_path) - write_commondata_to_file(self, dat_path) + folder_path.mkdir(exist_ok=True) + # Get the same names as one would use for the filters + data_path, unc_path = generate_path_filtered_data(folder_path, self.setname) + # And attach it to the given folder + data_path = folder_path / data_path.name + unc_path = folder_path / unc_path.name + # Export data and uncertainties + self.export_data(data_path.open("w", encoding="utf-8")) + self.export_uncertainties(unc_path.open("w", encoding="utf-8")) diff --git a/validphys2/src/validphys/cuts/filters.yaml b/validphys2/src/validphys/cuts/filters.yaml index 7187e465f5..987a626dc0 100644 --- a/validphys2/src/validphys/cuts/filters.yaml +++ b/validphys2/src/validphys/cuts/filters.yaml @@ -1,140 +1,132 @@ -- dataset: CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM +- dataset: CMS_TTBAR_8TEV_2L_DIF_MTTBAR-YT-NORM reason: | - We remove one bin from the normalised distribution because it is + We remove one bin from the normalised distribution because it is linearly dependent on the others rule: "idat != 8" -- dataset: ATLAS_TTB_DIFF_8TEV_LJ_TPTNORM +- dataset: ATLAS_TTBAR_8TEV_LJ_DIF_PTT-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "p_TQ<450" + rule: "pT_t<450" -- dataset: ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM +- dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YT-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "yQ<2.05" + rule: "y_t<2.05" -- dataset: ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM +- dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "yQQ<1.90" + rule: "y_ttBar<1.90" -- dataset: ATLAS_TTB_DIFF_8TEV_LJ_TTMNORM +- dataset: ATLAS_TTBAR_8TEV_LJ_DIF_MTTBAR-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "MQQ<1350" + rule: "m_ttBar<1350" -- dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM +- dataset: ATLAS_TTBAR_8TEV_2L_DIF_YTTBAR-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "yQQ<2.00" + rule: "y_ttBar<2.00" -- dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTMNORM +- dataset: ATLAS_TTBAR_8TEV_2L_DIF_MTTBAR-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "MQQ<1000" + rule: "m_ttBar<1000" -- dataset: CMSTOPDIFF8TEVTRAPNORM +- dataset: CMS_TTBAR_8TEV_LJ_DIF_YT-NORM reason: | - We remove the last bin of the normalised distribution because it is + We remove the last bin of the LHCB_DY_8TEV_MUON_Ynormalised distribution because it is linearly dependent on the others - rule: "yQ<2.01" + rule: "y_t<2.01" -- dataset: CMSTOPDIFF8TEVTTRAPNORM +- dataset: CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM reason: | We remove the last bin of the normalised distribution because it is linearly dependent on the others - rule: "yQQ<1.82" - -- dataset: ATLAS1JET11 - reason: | - We keep only the first rapidity bin since the chi2 to the whole set of - rapidity bins is too large (known problems with correlation matrix). - This cut will be removed in the new bunch of fits that we are going to do - for the NNLO jet project. - rule: eta < 0.3 + rule: "y_ttBar<1.82" -- dataset: CMS_1JET_8TEV +- dataset: CMS_1JET_8TEV_PTY reason: | We keep only the bins with pTjet>74 GeV because fixed-order theory does not provide a good description of the data for smalle values of the jet transverse momentum. - rule: "p_T2 >= 5476" + rule: "pT >= 74" -- dataset: LHCBWZMU8TEV +- dataset: LHCB_DY_8TEV_MUON_Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. # Only evaluate rule if PTO matches PTO: NNLO+ - rule: "etay >= 2.25" + rule: "y >= 2.25" -- dataset: LHCBWMU8TEV +- dataset: LHCB_WPWM_8TEV_MUON_Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. # Only evaluate rule if PTO matches PTO: NNLO+ - rule: "etay >= 2.25" + rule: "y >= 2.25" -- dataset: LHCBZMU8TEV +- dataset: LHCB_Z0_8TEV_MUON_Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. # Only evaluate rule if PTO matches PTO: NNLO+ - rule: "etay >= 2.25" + rule: "y >= 2.25" -- dataset: LHCBWZMU7TEV +- dataset: LHCB_DY_7TEV_MUON_Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. PTO: NNLO+ - rule: "etay >= 2.25" + rule: "y >= 2.25" -- dataset: LHCBWMU7TEV +- dataset: LHCB_WPWM_7TEV_MUON_Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. PTO: NNLO+ - rule: "etay >= 2.25" + rule: "y >= 2.25" -- dataset: LHCBZMU7TEV +- dataset: LHCB_Z0_7TEV_MUON_Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. PTO: NNLO+ - rule: "etay >= 2.25" + rule: "y >= 2.25" -- dataset: LHCB_Z_13TEV_DIELECTRON +- dataset: LHCB_Z0_13TEV_DIELECTRON-Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. PTO: NNLO+ - rule: "etay >= 2.20" + rule: "y >= 2.20" -- dataset: LHCB_Z_13TEV_DIMUON +- dataset: LHCB_Z0_13TEV_DIMUON-Y reason: | The first two bins in rapidity have unnaturally large K-factors which we don't understand, and theory calculations are unstable here, so we remove these points at NNLO. PTO: NNLO+ - rule: "etay >= 2.20" + rule: "y >= 2.20" -- dataset: D0WMASY +- dataset: D0_WPWM_1P96TEV_ASY reason: | Calculation of the NNLO K-factor at the precision required for data points for which the asymmetry is small is unfeasible. So we can only fit points @@ -142,68 +134,49 @@ PTO: NNLO+ rule: "fabs(central_value) >= 0.03" -- dataset: D0WEASY - reason: | - Calculation of the NNLO K-factor at the precision required for data points - for which the asymmetry is small is unfeasible. So we can only fit points - at NNLO for which the value of the asymmetry is not too small. - PTO: NNLO+ - rule: "fabs(central_value) >= 0.03" -- dataset: ATLAS_WP_JET_8TEV_PT +- dataset: ATLAS_WJ_JET_8TEV_WP-PT reason: Avoid the region where small-pT resummation effects become important. rule: "p_T >= 30" -- dataset: ATLAS_WM_JET_8TEV_PT +- dataset: ATLAS_WJ_JET_8TEV_WM-PT reason: Avoid the region where small-pT resummation effects become important. rule: "p_T >= 30" -- dataset: ATLASZPT7TEV - reason: Avoid the region where resummation effects become important. - rule: "p_T2 >= 30**2" - -- dataset: ATLASZPT7TEV - reason: Avoid the region where EWK corrections are important. - rule: "p_T2 <= 500**2" - -- dataset: ATLASZPT8TEVMDIST +- dataset: ATLAS_Z0J_8TEV_PT-M reason: Avoid the region where resummation effects become important. rule: "p_T >= 30" -- dataset: ATLASZPT8TEVYDIST +- dataset: ATLAS_Z0J_8TEV_PT-Y reason: Avoid the region where resummation effects become important. rule: "p_T2 >= 30**2" -- dataset: ATLASZPT8TEVYDIST +- dataset: ATLAS_Z0J_8TEV_PT-Y reason: Avoid the region where EWK corrections are important. rule: "p_T2 <= 150**2" -- dataset: CMSZDIFF12 +- dataset: CMS_Z0_8TEV_PT-Y reason: Avoid the region where resummation effects become important. rule: "p_T2 >= 30**2" -- dataset: CMSZDIFF12 +- dataset: CMS_Z0_8TEV_PT-Y reason: Avoid the region where EWK corrections are important. rule: "p_T2 <= 170**2" -- dataset: CMSZDIFF12 +- dataset: CMS_Z0_8TEV_PT-Y reason: | removes the last rapidity bin, where we cannot provide a decent description due to unknown reasons. rule: "etay <= 1.6" -- dataset: ATLASWPT31PB - reason: Avoid the region where EWK corrections are important. - rule: "p_T > 30" - -- dataset: ATLAS_DY_2D_8TEV_LOWMASS +- dataset: ATLAS_DY_8TEV_LOWMASS_M-Y reason: Avoid overlap with the high mass ATLAS2DDY8TEV dataset. local_variables: M: sqrt(M2) max_M: 116.0 rule: M <= max_M - -- dataset: CMSDY2D11 + +- dataset: CMS_Z0_7TEV_DIMUON_2D reason: | Removes data points for which the NNLO K-factors are very large and thus the NLO calculation is unreliable. @@ -213,70 +186,39 @@ max_M: 200.0 rule: M <= max_M -- dataset: CMSDY2D11 +- dataset: CMS_Z0_7TEV_DIMUON_2D reason: Remove data points for which electroweak corrections are large. PTO: NNLO- local_variables: M: sqrt(M2) min_M: 30.0 max_rapidity: 2.2 - rule: M >= min_M and etay <= max_rapidity + rule: M >= min_M and y <= max_rapidity -- dataset: CMSDY2D11 +- dataset: CMS_Z0_7TEV_DIMUON_2D reason: Remove data points for which electroweak corrections are large. PTO: NNLO+ local_variables: M: sqrt(M2) max_rapidity: 2.2 max_M: 200.0 - rule: M <= max_M and etay <= max_rapidity - -- dataset: CMSDY2D12 - reason: | - Removes data points for which the NNLO K-factors are very large and - thus the NLO calculation is unreliable. - PTO: NNLO- - local_variables: - M: sqrt(M2) - max_M: 200.0 - rule: M <= max_M + rule: M <= max_M and y <= max_rapidity -- dataset: CMSDY2D12 - reason: Remove data points for which electroweak corrections are large. - PTO: NNLO- - local_variables: - M: sqrt(M2) - min_M: 30.0 - max_rapidity: 2.2 - rule: M >= min_M and etay <= max_rapidity -- dataset: CMSDY2D12 - reason: Remove data points for which electroweak corrections are large. - PTO: NNLO+ - local_variables: - M: sqrt(M2) - max_rapidity: 2.2 - max_M: 200.0 - rule: M <= max_M and etay <= max_rapidity -- dataset: ATLASZHIGHMASS49FB +- dataset: ATLAS_Z0_7TEV_49FB_HIMASS reason: Avoid the region where resummation effects become important. local_variables: max_M: 200.0 rule: M_ll <= max_M -# - dataset: LHCBLOWMASS37PB -# rule: "pT <= maxCMSDY2Dminv" - -# - dataset: ATLASLOMASSDY11 -# rule: "False if (pto in (0, 1) and idat < 6) else True" -- dataset: ATLASLOMASSDY11EXT +- dataset: ATLAS_Z0_7TEV_LOMASS_M reason: Removes data points for which NNLO K-factors are large. PTO: NNLO- rule: "idat >= 2" -- dataset: DYE886P +- dataset: DYE866_Z0_800GEV_PXSEC reason: | Remove data points for which the fixed-order perturbative expansion is not reliable since resummation effects are large. A justification of these cuts @@ -324,7 +266,7 @@ maxY: 0.663 rule: "tau <= maxTau and fabs(y/ymax) <= maxY" -- dataset: DYE605_dw_ite +- dataset: DYE605_Z0_38P8GEV_DW_PXSEC reason: | Remove data points for which the fixed-order perturbative expansion is not reliable since resummation effects are large. A justification of these cuts @@ -354,19 +296,6 @@ w2: Q2 * (1 - x) / x rule: "Q2 > q2min and w2 > w2min" -- dataset: EMCF2P - reason: | - The inclusive EMC structure function measurements where known to be - affected with a problem in the experimental measurement which rendered them - unreliable for x < 0.1. - rule: "x > 0.1" - -- dataset: EMCF2D - reason: | - The inclusive EMC structure function measurements where known to be - affected with a problem in the experimental measurement which rendered them - unreliable for x < 0.1. - rule: "x > 0.1" - process_type: DIS_NCP_CH reason: | diff --git a/validphys2/src/validphys/datafiles/__init__.py b/validphys2/src/validphys/datafiles/__init__.py index f80600b6cb..353499060f 100644 --- a/validphys2/src/validphys/datafiles/__init__.py +++ b/validphys2/src/validphys/datafiles/__init__.py @@ -1,5 +1,64 @@ +from functools import lru_cache import pathlib +from reportengine.compat import yaml + path_vpdata = pathlib.Path(__file__).parent -path_commondata = pathlib.Path(__file__).with_name('commondata') -path_theorydb = pathlib.Path(__file__).with_name('theory.db') +path_commondata = path_vpdata / "new_commondata" + +# VP should not have access to this file, only to the products +_path_legacy_mapping = path_commondata / "dataset_names.yml" +legacy_to_new_mapping = yaml.YAML().load(_path_legacy_mapping) + + +@lru_cache +def legacy_to_new_map(dataset_name, sys=None): + """Find the new dataset name and variant corresponding to an old dataset + and systematics choice""" + if dataset_name not in legacy_to_new_mapping: + return dataset_name, None + + new_name = legacy_to_new_mapping[dataset_name] + if isinstance(new_name, str): + if sys is not None: + raise KeyError( + f"I cannot translate the combination of {dataset_name} and sys: {sys}. Please report this." + ) + return new_name, None + + variant = new_name.get("variant") + new_name = new_name["dataset"] + if sys is not None: + if variant is None: + raise KeyError( + f"I cannot translate the combination of {dataset_name} and sys: {sys}. Please report this." + ) + variant += f"_{sys}" + + return new_name, variant + + +@lru_cache +def new_to_legacy_map(dataset_name, variant_used): + """Loop over the dictionary and find the right dataset""" + # It is not possible to reverse the dictionary because + # we can have 2 old dataset mapped to the same new one + + possible_match = None + for old_name, new_name in legacy_to_new_mapping.items(): + variant = None + if not isinstance(new_name, str): + variant = new_name.get("variant") + new_name = new_name["dataset"] + + if new_name == dataset_name: + if variant_used == variant: + return old_name + # Now, for legacy variants we might want to match (sys,) + # so accept anything that starts with `legacy_` + # so variant `legacy_10` will match `legacy` in the dictionary + # but if an exact match if found before, the search ends + if variant_used is not None and variant_used.startswith("legacy_"): + possible_match = old_name + + return possible_match diff --git a/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_D_DW/metadata.yaml b/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_D_DW/metadata.yaml index 7665248c56..e305809a2e 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_D_DW/metadata.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_D_DW/metadata.yaml @@ -62,7 +62,9 @@ implemented_observables: conversion_factor: 1.0 operation: 'NULL' FK_tables: - - - BCDMS_NC_EM_D_F2 + - - BCDMS_NC_100GEV_EM_D_F2 + - BCDMS_NC_200GEV_EM_D_F2 + - BCDMS_NC_280GEV_EM_D_F2 data_uncertainties: [] variants: legacy: diff --git a/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_P_DW/metadata.yaml b/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_P_DW/metadata.yaml index 305011a64c..3483fa2ce7 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_P_DW/metadata.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/BCDMS_NC_NOTFIXED_P_DW/metadata.yaml @@ -62,7 +62,10 @@ implemented_observables: conversion_factor: 1.0 operation: 'NULL' FK_tables: - - - BCDMS_NC_EM_P_F2 + - - BCDMS_NC_100GEV_EM_P_F2 + - BCDMS_NC_120GEV_EM_P_F2 + - BCDMS_NC_200GEV_EM_P_F2 + - BCDMS_NC_280GEV_EM_P_F2 data_uncertainties: [] variants: legacy: diff --git a/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/data_legacy_ZRAP.yaml b/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/data_legacy_ZRAP.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/data_legacy_ZRAP.yaml rename to validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/data_legacy_ZRAP.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/kinematics_ZRAP.yaml b/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/kinematics_ZRAP.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/kinematics_ZRAP.yaml rename to validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/kinematics_ZRAP.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/metadata.yaml b/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/metadata.yaml similarity index 98% rename from validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/metadata.yaml rename to validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/metadata.yaml index a397a7394a..c50685ce79 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/metadata.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/metadata.yaml @@ -1,4 +1,4 @@ -setname: CDF_Z0_1P96GEV +setname: CDF_Z0_1P96TEV version: 1 version_comment: Port of old commondata nnpdf_metadata: diff --git a/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/uncertainties_legacy_ZRAP.yaml b/validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/uncertainties_legacy_ZRAP.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96GEV/uncertainties_legacy_ZRAP.yaml rename to validphys2/src/validphys/datafiles/new_commondata/CDF_Z0_1P96TEV/uncertainties_legacy_ZRAP.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/kinematics.yaml b/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/kinematics.yaml index ca9e9203d6..16f29959a3 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/kinematics.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/kinematics.yaml @@ -1,2869 +1,2869 @@ bins: -- y: - min: 0.0 - mid: 0.25 - max: 0.5 - p_T2: - min: 441.0 - mid: 506.25 - max: 576.0 +- pT: + max: 24.0 + mid: 22.5 + min: 21.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 - max: 0.5 - p_T2: - min: 576.0 - mid: 676.0 - max: 784.0 - sqrts: - min: null mid: 8000.0 - max: null -- y: - min: 0.0 - mid: 0.25 + min: null + y: max: 0.5 - p_T2: - min: 784.0 - mid: 900.0 - max: 1024.0 + mid: 0.25 + min: 0.0 +- pT: + max: 28.0 + mid: 26.0 + min: 24.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1024.0 - mid: 1190.25 - max: 1369.0 + mid: 0.25 + min: 0.0 +- pT: + max: 32.0 + mid: 30.0 + min: 28.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1369.0 - mid: 1600.0 - max: 1849.0 + mid: 0.25 + min: 0.0 +- pT: + max: 37.0 + mid: 34.5 + min: 32.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1849.0 - mid: 2116.0 - max: 2401.0 + mid: 0.25 + min: 0.0 +- pT: + max: 43.0 + mid: 40.0 + min: 37.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 2401.0 - mid: 2756.25 - max: 3136.0 + mid: 0.25 + min: 0.0 +- pT: + max: 49.0 + mid: 46.0 + min: 43.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 3136.0 - mid: 3600.0 - max: 4096.0 + mid: 0.25 + min: 0.0 +- pT: + max: 56.0 + mid: 52.5 + min: 49.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 4096.0 - mid: 4761.0 - max: 5476.0 + mid: 0.25 + min: 0.0 +- pT: + max: 64.0 + mid: 60.0 + min: 56.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 5476.0 - mid: 6241.0 - max: 7056.0 + mid: 0.25 + min: 0.0 +- pT: + max: 74.0 + mid: 69.0 + min: 64.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 7056.0 - mid: 8190.25 - max: 9409.0 + mid: 0.25 + min: 0.0 +- pT: + max: 84.0 + mid: 79.0 + min: 74.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 9409.0 - mid: 11130.25 - max: 12996.0 + mid: 0.25 + min: 0.0 +- pT: + max: 97.0 + mid: 90.5 + min: 84.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 12996.0 - mid: 15252.25 - max: 17689.0 + mid: 0.25 + min: 0.0 +- pT: + max: 114.0 + mid: 105.5 + min: 97.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 17689.0 - mid: 20449.0 - max: 23409.0 + mid: 0.25 + min: 0.0 +- pT: + max: 133.0 + mid: 123.5 + min: 114.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 23409.0 - mid: 26732.25 - max: 30276.0 + mid: 0.25 + min: 0.0 +- pT: + max: 153.0 + mid: 143.0 + min: 133.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 30276.0 - mid: 34225.0 - max: 38416.0 + mid: 0.25 + min: 0.0 +- pT: + max: 174.0 + mid: 163.5 + min: 153.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 38416.0 - mid: 43264.0 - max: 48400.0 + mid: 0.25 + min: 0.0 +- pT: + max: 196.0 + mid: 185.0 + min: 174.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 48400.0 - mid: 54056.25 - max: 60025.0 + mid: 0.25 + min: 0.0 +- pT: + max: 220.0 + mid: 208.0 + min: 196.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 60025.0 - mid: 66822.25 - max: 73984.0 + mid: 0.25 + min: 0.0 +- pT: + max: 245.0 + mid: 232.5 + min: 220.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 73984.0 - mid: 81796.0 - max: 90000.0 + mid: 0.25 + min: 0.0 +- pT: + max: 272.0 + mid: 258.5 + min: 245.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 90000.0 - mid: 99225.0 - max: 108900.0 + mid: 0.25 + min: 0.0 +- pT: + max: 300.0 + mid: 286.0 + min: 272.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 108900.0 - mid: 119716.0 - max: 131044.0 + mid: 0.25 + min: 0.0 +- pT: + max: 330.0 + mid: 315.0 + min: 300.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 131044.0 - mid: 143262.25 - max: 156025.0 + mid: 0.25 + min: 0.0 +- pT: + max: 362.0 + mid: 346.0 + min: 330.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 156025.0 - mid: 170156.25 - max: 184900.0 + mid: 0.25 + min: 0.0 +- pT: + max: 395.0 + mid: 378.5 + min: 362.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 184900.0 - mid: 201601.0 - max: 219024.0 + mid: 0.25 + min: 0.0 +- pT: + max: 430.0 + mid: 412.5 + min: 395.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 219024.0 - mid: 237656.25 - max: 257049.0 + mid: 0.25 + min: 0.0 +- pT: + max: 468.0 + mid: 449.0 + min: 430.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 257049.0 - mid: 278256.25 - max: 300304.0 + mid: 0.25 + min: 0.0 +- pT: + max: 507.0 + mid: 487.5 + min: 468.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 300304.0 - mid: 324900.0 - max: 350464.0 + mid: 0.25 + min: 0.0 +- pT: + max: 548.0 + mid: 527.5 + min: 507.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 350464.0 - mid: 378225.0 - max: 407044.0 + mid: 0.25 + min: 0.0 +- pT: + max: 592.0 + mid: 570.0 + min: 548.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 407044.0 - mid: 438244.0 - max: 470596.0 + mid: 0.25 + min: 0.0 +- pT: + max: 638.0 + mid: 615.0 + min: 592.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 470596.0 - mid: 506232.25 - max: 543169.0 + mid: 0.25 + min: 0.0 +- pT: + max: 686.0 + mid: 662.0 + min: 638.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 543169.0 - mid: 582932.25 - max: 624100.0 + mid: 0.25 + min: 0.0 +- pT: + max: 737.0 + mid: 711.5 + min: 686.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 624100.0 - mid: 669124.0 - max: 715716.0 + mid: 0.25 + min: 0.0 +- pT: + max: 790.0 + mid: 763.5 + min: 737.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 715716.0 - mid: 766500.25 - max: 819025.0 + mid: 0.25 + min: 0.0 +- pT: + max: 846.0 + mid: 818.0 + min: 790.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 819025.0 - mid: 876096.0 - max: 935089.0 + mid: 0.25 + min: 0.0 +- pT: + max: 905.0 + mid: 875.5 + min: 846.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 935089.0 - mid: 999000.25 - max: 1065024.0 + mid: 0.25 + min: 0.0 +- pT: + max: 967.0 + mid: 936.0 + min: 905.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1065024.0 - mid: 1137422.25 - max: 1212201.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1032.0 + mid: 999.5 + min: 967.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1212201.0 - mid: 1291632.25 - max: 1373584.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1101.0 + mid: 1066.5 + min: 1032.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1373584.0 - mid: 1464100.0 - max: 1557504.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1172.0 + mid: 1136.5 + min: 1101.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1557504.0 - mid: 1657656.25 - max: 1760929.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1248.0 + mid: 1210.0 + min: 1172.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1760929.0 - mid: 1872792.25 - max: 1988100.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1327.0 + mid: 1287.5 + min: 1248.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 1988100.0 - mid: 2112662.25 - max: 2241009.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1410.0 + mid: 1368.5 + min: 1327.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 2241009.0 - mid: 2379306.25 - max: 2521744.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1497.0 + mid: 1453.5 + min: 1410.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 2521744.0 - mid: 2842596.0 - max: 3182656.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1588.0 + mid: 1542.5 + min: 1497.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 3182656.0 - mid: 3802500.0 - max: 4477456.0 + mid: 0.25 + min: 0.0 +- pT: + max: 1784.0 + mid: 1686.0 + min: 1588.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.0 - mid: 0.25 + mid: 8000.0 + min: null + y: max: 0.5 - p_T2: - min: 4477456.0 - mid: 5326864.0 - max: 6250000.0 + mid: 0.25 + min: 0.0 +- pT: + max: 2116.0 + mid: 1950.0 + min: 1784.0 sqrts: + max: null + mid: 8000.0 min: null + y: + max: 0.5 + mid: 0.25 + min: 0.0 +- pT: + max: 2500.0 + mid: 2308.0 + min: 2116.0 + sqrts: + max: null mid: 8000.0 + min: null + y: + max: 0.5 + mid: 0.25 + min: 0.0 +- pT: + max: 24.0 + mid: 22.5 + min: 21.0 + sqrts: max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 441.0 - mid: 506.25 - max: 576.0 + mid: 0.75 + min: 0.5 +- pT: + max: 28.0 + mid: 26.0 + min: 24.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 576.0 - mid: 676.0 - max: 784.0 + mid: 0.75 + min: 0.5 +- pT: + max: 32.0 + mid: 30.0 + min: 28.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 784.0 - mid: 900.0 - max: 1024.0 + mid: 0.75 + min: 0.5 +- pT: + max: 37.0 + mid: 34.5 + min: 32.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1024.0 - mid: 1190.25 - max: 1369.0 + mid: 0.75 + min: 0.5 +- pT: + max: 43.0 + mid: 40.0 + min: 37.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1369.0 - mid: 1600.0 - max: 1849.0 + mid: 0.75 + min: 0.5 +- pT: + max: 49.0 + mid: 46.0 + min: 43.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1849.0 - mid: 2116.0 - max: 2401.0 + mid: 0.75 + min: 0.5 +- pT: + max: 56.0 + mid: 52.5 + min: 49.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 2401.0 - mid: 2756.25 - max: 3136.0 + mid: 0.75 + min: 0.5 +- pT: + max: 64.0 + mid: 60.0 + min: 56.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 3136.0 - mid: 3600.0 - max: 4096.0 + mid: 0.75 + min: 0.5 +- pT: + max: 74.0 + mid: 69.0 + min: 64.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 4096.0 - mid: 4761.0 - max: 5476.0 + mid: 0.75 + min: 0.5 +- pT: + max: 84.0 + mid: 79.0 + min: 74.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 5476.0 - mid: 6241.0 - max: 7056.0 + mid: 0.75 + min: 0.5 +- pT: + max: 97.0 + mid: 90.5 + min: 84.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 7056.0 - mid: 8190.25 - max: 9409.0 + mid: 0.75 + min: 0.5 +- pT: + max: 114.0 + mid: 105.5 + min: 97.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 9409.0 - mid: 11130.25 - max: 12996.0 + mid: 0.75 + min: 0.5 +- pT: + max: 133.0 + mid: 123.5 + min: 114.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 12996.0 - mid: 15252.25 - max: 17689.0 + mid: 0.75 + min: 0.5 +- pT: + max: 153.0 + mid: 143.0 + min: 133.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 17689.0 - mid: 20449.0 - max: 23409.0 + mid: 0.75 + min: 0.5 +- pT: + max: 174.0 + mid: 163.5 + min: 153.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 23409.0 - mid: 26732.25 - max: 30276.0 + mid: 0.75 + min: 0.5 +- pT: + max: 196.0 + mid: 185.0 + min: 174.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 30276.0 - mid: 34225.0 - max: 38416.0 + mid: 0.75 + min: 0.5 +- pT: + max: 220.0 + mid: 208.0 + min: 196.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 38416.0 - mid: 43264.0 - max: 48400.0 + mid: 0.75 + min: 0.5 +- pT: + max: 245.0 + mid: 232.5 + min: 220.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 48400.0 - mid: 54056.25 - max: 60025.0 + mid: 0.75 + min: 0.5 +- pT: + max: 272.0 + mid: 258.5 + min: 245.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 60025.0 - mid: 66822.25 - max: 73984.0 + mid: 0.75 + min: 0.5 +- pT: + max: 300.0 + mid: 286.0 + min: 272.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 73984.0 - mid: 81796.0 - max: 90000.0 + mid: 0.75 + min: 0.5 +- pT: + max: 330.0 + mid: 315.0 + min: 300.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 90000.0 - mid: 99225.0 - max: 108900.0 + mid: 0.75 + min: 0.5 +- pT: + max: 362.0 + mid: 346.0 + min: 330.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 108900.0 - mid: 119716.0 - max: 131044.0 + mid: 0.75 + min: 0.5 +- pT: + max: 395.0 + mid: 378.5 + min: 362.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 131044.0 - mid: 143262.25 - max: 156025.0 + mid: 0.75 + min: 0.5 +- pT: + max: 430.0 + mid: 412.5 + min: 395.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 156025.0 - mid: 170156.25 - max: 184900.0 + mid: 0.75 + min: 0.5 +- pT: + max: 468.0 + mid: 449.0 + min: 430.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 184900.0 - mid: 201601.0 - max: 219024.0 + mid: 0.75 + min: 0.5 +- pT: + max: 507.0 + mid: 487.5 + min: 468.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 219024.0 - mid: 237656.25 - max: 257049.0 + mid: 0.75 + min: 0.5 +- pT: + max: 548.0 + mid: 527.5 + min: 507.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 257049.0 - mid: 278256.25 - max: 300304.0 + mid: 0.75 + min: 0.5 +- pT: + max: 592.0 + mid: 570.0 + min: 548.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 300304.0 - mid: 324900.0 - max: 350464.0 + mid: 0.75 + min: 0.5 +- pT: + max: 638.0 + mid: 615.0 + min: 592.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 350464.0 - mid: 378225.0 - max: 407044.0 + mid: 0.75 + min: 0.5 +- pT: + max: 686.0 + mid: 662.0 + min: 638.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 407044.0 - mid: 438244.0 - max: 470596.0 + mid: 0.75 + min: 0.5 +- pT: + max: 737.0 + mid: 711.5 + min: 686.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 470596.0 - mid: 506232.25 - max: 543169.0 + mid: 0.75 + min: 0.5 +- pT: + max: 790.0 + mid: 763.5 + min: 737.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 543169.0 - mid: 582932.25 - max: 624100.0 + mid: 0.75 + min: 0.5 +- pT: + max: 846.0 + mid: 818.0 + min: 790.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 624100.0 - mid: 669124.0 - max: 715716.0 + mid: 0.75 + min: 0.5 +- pT: + max: 905.0 + mid: 875.5 + min: 846.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 715716.0 - mid: 766500.25 - max: 819025.0 + mid: 0.75 + min: 0.5 +- pT: + max: 967.0 + mid: 936.0 + min: 905.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 819025.0 - mid: 876096.0 - max: 935089.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1032.0 + mid: 999.5 + min: 967.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 935089.0 - mid: 999000.25 - max: 1065024.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1101.0 + mid: 1066.5 + min: 1032.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1065024.0 - mid: 1137422.25 - max: 1212201.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1172.0 + mid: 1136.5 + min: 1101.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1212201.0 - mid: 1291632.25 - max: 1373584.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1248.0 + mid: 1210.0 + min: 1172.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1373584.0 - mid: 1464100.0 - max: 1557504.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1327.0 + mid: 1287.5 + min: 1248.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1557504.0 - mid: 1657656.25 - max: 1760929.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1410.0 + mid: 1368.5 + min: 1327.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1760929.0 - mid: 1872792.25 - max: 1988100.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1497.0 + mid: 1453.5 + min: 1410.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 1988100.0 - mid: 2112662.25 - max: 2241009.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1588.0 + mid: 1542.5 + min: 1497.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 2241009.0 - mid: 2379306.25 - max: 2521744.0 + mid: 0.75 + min: 0.5 +- pT: + max: 1784.0 + mid: 1686.0 + min: 1588.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 2521744.0 - mid: 2842596.0 - max: 3182656.0 + mid: 0.75 + min: 0.5 +- pT: + max: 2116.0 + mid: 1950.0 + min: 1784.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 3182656.0 - mid: 3802500.0 - max: 4477456.0 + mid: 0.75 + min: 0.5 +- pT: + max: 2500.0 + mid: 2308.0 + min: 2116.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 0.5 - mid: 0.75 + mid: 8000.0 + min: null + y: max: 1.0 - p_T2: - min: 4477456.0 - mid: 5326864.0 - max: 6250000.0 + mid: 0.75 + min: 0.5 +- pT: + max: 24.0 + mid: 22.5 + min: 21.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 441.0 - mid: 506.25 - max: 576.0 + mid: 1.25 + min: 1.0 +- pT: + max: 28.0 + mid: 26.0 + min: 24.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 576.0 - mid: 676.0 - max: 784.0 + mid: 1.25 + min: 1.0 +- pT: + max: 32.0 + mid: 30.0 + min: 28.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 784.0 - mid: 900.0 - max: 1024.0 + mid: 1.25 + min: 1.0 +- pT: + max: 37.0 + mid: 34.5 + min: 32.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1024.0 - mid: 1190.25 - max: 1369.0 + mid: 1.25 + min: 1.0 +- pT: + max: 43.0 + mid: 40.0 + min: 37.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1369.0 - mid: 1600.0 - max: 1849.0 + mid: 1.25 + min: 1.0 +- pT: + max: 49.0 + mid: 46.0 + min: 43.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1849.0 - mid: 2116.0 - max: 2401.0 + mid: 1.25 + min: 1.0 +- pT: + max: 56.0 + mid: 52.5 + min: 49.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 2401.0 - mid: 2756.25 - max: 3136.0 + mid: 1.25 + min: 1.0 +- pT: + max: 64.0 + mid: 60.0 + min: 56.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 3136.0 - mid: 3600.0 - max: 4096.0 + mid: 1.25 + min: 1.0 +- pT: + max: 74.0 + mid: 69.0 + min: 64.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 4096.0 - mid: 4761.0 - max: 5476.0 + mid: 1.25 + min: 1.0 +- pT: + max: 84.0 + mid: 79.0 + min: 74.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 5476.0 - mid: 6241.0 - max: 7056.0 + mid: 1.25 + min: 1.0 +- pT: + max: 97.0 + mid: 90.5 + min: 84.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 7056.0 - mid: 8190.25 - max: 9409.0 + mid: 1.25 + min: 1.0 +- pT: + max: 114.0 + mid: 105.5 + min: 97.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 9409.0 - mid: 11130.25 - max: 12996.0 + mid: 1.25 + min: 1.0 +- pT: + max: 133.0 + mid: 123.5 + min: 114.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 12996.0 - mid: 15252.25 - max: 17689.0 + mid: 1.25 + min: 1.0 +- pT: + max: 153.0 + mid: 143.0 + min: 133.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 17689.0 - mid: 20449.0 - max: 23409.0 + mid: 1.25 + min: 1.0 +- pT: + max: 174.0 + mid: 163.5 + min: 153.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 23409.0 - mid: 26732.25 - max: 30276.0 + mid: 1.25 + min: 1.0 +- pT: + max: 196.0 + mid: 185.0 + min: 174.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 30276.0 - mid: 34225.0 - max: 38416.0 + mid: 1.25 + min: 1.0 +- pT: + max: 220.0 + mid: 208.0 + min: 196.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 38416.0 - mid: 43264.0 - max: 48400.0 + mid: 1.25 + min: 1.0 +- pT: + max: 245.0 + mid: 232.5 + min: 220.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 48400.0 - mid: 54056.25 - max: 60025.0 + mid: 1.25 + min: 1.0 +- pT: + max: 272.0 + mid: 258.5 + min: 245.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 60025.0 - mid: 66822.25 - max: 73984.0 + mid: 1.25 + min: 1.0 +- pT: + max: 300.0 + mid: 286.0 + min: 272.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 73984.0 - mid: 81796.0 - max: 90000.0 + mid: 1.25 + min: 1.0 +- pT: + max: 330.0 + mid: 315.0 + min: 300.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 90000.0 - mid: 99225.0 - max: 108900.0 + mid: 1.25 + min: 1.0 +- pT: + max: 362.0 + mid: 346.0 + min: 330.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 108900.0 - mid: 119716.0 - max: 131044.0 + mid: 1.25 + min: 1.0 +- pT: + max: 395.0 + mid: 378.5 + min: 362.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 131044.0 - mid: 143262.25 - max: 156025.0 + mid: 1.25 + min: 1.0 +- pT: + max: 430.0 + mid: 412.5 + min: 395.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 156025.0 - mid: 170156.25 - max: 184900.0 + mid: 1.25 + min: 1.0 +- pT: + max: 468.0 + mid: 449.0 + min: 430.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 184900.0 - mid: 201601.0 - max: 219024.0 + mid: 1.25 + min: 1.0 +- pT: + max: 507.0 + mid: 487.5 + min: 468.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 219024.0 - mid: 237656.25 - max: 257049.0 + mid: 1.25 + min: 1.0 +- pT: + max: 548.0 + mid: 527.5 + min: 507.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 257049.0 - mid: 278256.25 - max: 300304.0 + mid: 1.25 + min: 1.0 +- pT: + max: 592.0 + mid: 570.0 + min: 548.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 300304.0 - mid: 324900.0 - max: 350464.0 + mid: 1.25 + min: 1.0 +- pT: + max: 638.0 + mid: 615.0 + min: 592.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 350464.0 - mid: 378225.0 - max: 407044.0 + mid: 1.25 + min: 1.0 +- pT: + max: 686.0 + mid: 662.0 + min: 638.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 407044.0 - mid: 438244.0 - max: 470596.0 + mid: 1.25 + min: 1.0 +- pT: + max: 737.0 + mid: 711.5 + min: 686.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 470596.0 - mid: 506232.25 - max: 543169.0 + mid: 1.25 + min: 1.0 +- pT: + max: 790.0 + mid: 763.5 + min: 737.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 543169.0 - mid: 582932.25 - max: 624100.0 + mid: 1.25 + min: 1.0 +- pT: + max: 846.0 + mid: 818.0 + min: 790.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 624100.0 - mid: 669124.0 - max: 715716.0 + mid: 1.25 + min: 1.0 +- pT: + max: 905.0 + mid: 875.5 + min: 846.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 715716.0 - mid: 766500.25 - max: 819025.0 + mid: 1.25 + min: 1.0 +- pT: + max: 967.0 + mid: 936.0 + min: 905.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 819025.0 - mid: 876096.0 - max: 935089.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1032.0 + mid: 999.5 + min: 967.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 935089.0 - mid: 999000.25 - max: 1065024.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1101.0 + mid: 1066.5 + min: 1032.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1065024.0 - mid: 1137422.25 - max: 1212201.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1172.0 + mid: 1136.5 + min: 1101.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1212201.0 - mid: 1291632.25 - max: 1373584.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1248.0 + mid: 1210.0 + min: 1172.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1373584.0 - mid: 1464100.0 - max: 1557504.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1327.0 + mid: 1287.5 + min: 1248.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1557504.0 - mid: 1657656.25 - max: 1760929.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1410.0 + mid: 1368.5 + min: 1327.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1760929.0 - mid: 1872792.25 - max: 1988100.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1497.0 + mid: 1453.5 + min: 1410.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 1988100.0 - mid: 2112662.25 - max: 2241009.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1588.0 + mid: 1542.5 + min: 1497.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 2241009.0 - mid: 2379306.25 - max: 2521744.0 + mid: 1.25 + min: 1.0 +- pT: + max: 1784.0 + mid: 1686.0 + min: 1588.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 2521744.0 - mid: 2842596.0 - max: 3182656.0 + mid: 1.25 + min: 1.0 +- pT: + max: 2116.0 + mid: 1950.0 + min: 1784.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.0 - mid: 1.25 + mid: 8000.0 + min: null + y: max: 1.5 - p_T2: - min: 3182656.0 - mid: 3802500.0 - max: 4477456.0 + mid: 1.25 + min: 1.0 +- pT: + max: 24.0 + mid: 22.5 + min: 21.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 441.0 - mid: 506.25 - max: 576.0 + mid: 1.75 + min: 1.5 +- pT: + max: 28.0 + mid: 26.0 + min: 24.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 576.0 - mid: 676.0 - max: 784.0 + mid: 1.75 + min: 1.5 +- pT: + max: 32.0 + mid: 30.0 + min: 28.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 784.0 - mid: 900.0 - max: 1024.0 + mid: 1.75 + min: 1.5 +- pT: + max: 37.0 + mid: 34.5 + min: 32.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1024.0 - mid: 1190.25 - max: 1369.0 + mid: 1.75 + min: 1.5 +- pT: + max: 43.0 + mid: 40.0 + min: 37.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1369.0 - mid: 1600.0 - max: 1849.0 + mid: 1.75 + min: 1.5 +- pT: + max: 49.0 + mid: 46.0 + min: 43.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1849.0 - mid: 2116.0 - max: 2401.0 + mid: 1.75 + min: 1.5 +- pT: + max: 56.0 + mid: 52.5 + min: 49.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 2401.0 - mid: 2756.25 - max: 3136.0 + mid: 1.75 + min: 1.5 +- pT: + max: 64.0 + mid: 60.0 + min: 56.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 3136.0 - mid: 3600.0 - max: 4096.0 + mid: 1.75 + min: 1.5 +- pT: + max: 74.0 + mid: 69.0 + min: 64.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 4096.0 - mid: 4761.0 - max: 5476.0 + mid: 1.75 + min: 1.5 +- pT: + max: 84.0 + mid: 79.0 + min: 74.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 5476.0 - mid: 6241.0 - max: 7056.0 + mid: 1.75 + min: 1.5 +- pT: + max: 97.0 + mid: 90.5 + min: 84.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 7056.0 - mid: 8190.25 - max: 9409.0 + mid: 1.75 + min: 1.5 +- pT: + max: 114.0 + mid: 105.5 + min: 97.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 9409.0 - mid: 11130.25 - max: 12996.0 + mid: 1.75 + min: 1.5 +- pT: + max: 133.0 + mid: 123.5 + min: 114.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 12996.0 - mid: 15252.25 - max: 17689.0 + mid: 1.75 + min: 1.5 +- pT: + max: 153.0 + mid: 143.0 + min: 133.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 17689.0 - mid: 20449.0 - max: 23409.0 + mid: 1.75 + min: 1.5 +- pT: + max: 174.0 + mid: 163.5 + min: 153.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 23409.0 - mid: 26732.25 - max: 30276.0 + mid: 1.75 + min: 1.5 +- pT: + max: 196.0 + mid: 185.0 + min: 174.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 30276.0 - mid: 34225.0 - max: 38416.0 + mid: 1.75 + min: 1.5 +- pT: + max: 220.0 + mid: 208.0 + min: 196.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 38416.0 - mid: 43264.0 - max: 48400.0 + mid: 1.75 + min: 1.5 +- pT: + max: 245.0 + mid: 232.5 + min: 220.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 48400.0 - mid: 54056.25 - max: 60025.0 + mid: 1.75 + min: 1.5 +- pT: + max: 272.0 + mid: 258.5 + min: 245.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 60025.0 - mid: 66822.25 - max: 73984.0 + mid: 1.75 + min: 1.5 +- pT: + max: 300.0 + mid: 286.0 + min: 272.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 73984.0 - mid: 81796.0 - max: 90000.0 + mid: 1.75 + min: 1.5 +- pT: + max: 330.0 + mid: 315.0 + min: 300.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 90000.0 - mid: 99225.0 - max: 108900.0 + mid: 1.75 + min: 1.5 +- pT: + max: 362.0 + mid: 346.0 + min: 330.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 108900.0 - mid: 119716.0 - max: 131044.0 + mid: 1.75 + min: 1.5 +- pT: + max: 395.0 + mid: 378.5 + min: 362.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 131044.0 - mid: 143262.25 - max: 156025.0 + mid: 1.75 + min: 1.5 +- pT: + max: 430.0 + mid: 412.5 + min: 395.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 156025.0 - mid: 170156.25 - max: 184900.0 + mid: 1.75 + min: 1.5 +- pT: + max: 468.0 + mid: 449.0 + min: 430.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 184900.0 - mid: 201601.0 - max: 219024.0 + mid: 1.75 + min: 1.5 +- pT: + max: 507.0 + mid: 487.5 + min: 468.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 219024.0 - mid: 237656.25 - max: 257049.0 + mid: 1.75 + min: 1.5 +- pT: + max: 548.0 + mid: 527.5 + min: 507.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 257049.0 - mid: 278256.25 - max: 300304.0 + mid: 1.75 + min: 1.5 +- pT: + max: 592.0 + mid: 570.0 + min: 548.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 300304.0 - mid: 324900.0 - max: 350464.0 + mid: 1.75 + min: 1.5 +- pT: + max: 638.0 + mid: 615.0 + min: 592.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 350464.0 - mid: 378225.0 - max: 407044.0 + mid: 1.75 + min: 1.5 +- pT: + max: 686.0 + mid: 662.0 + min: 638.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 407044.0 - mid: 438244.0 - max: 470596.0 + mid: 1.75 + min: 1.5 +- pT: + max: 737.0 + mid: 711.5 + min: 686.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 470596.0 - mid: 506232.25 - max: 543169.0 + mid: 1.75 + min: 1.5 +- pT: + max: 790.0 + mid: 763.5 + min: 737.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 543169.0 - mid: 582932.25 - max: 624100.0 + mid: 1.75 + min: 1.5 +- pT: + max: 846.0 + mid: 818.0 + min: 790.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 624100.0 - mid: 669124.0 - max: 715716.0 + mid: 1.75 + min: 1.5 +- pT: + max: 905.0 + mid: 875.5 + min: 846.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 715716.0 - mid: 766500.25 - max: 819025.0 + mid: 1.75 + min: 1.5 +- pT: + max: 967.0 + mid: 936.0 + min: 905.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 819025.0 - mid: 876096.0 - max: 935089.0 + mid: 1.75 + min: 1.5 +- pT: + max: 1032.0 + mid: 999.5 + min: 967.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 935089.0 - mid: 999000.25 - max: 1065024.0 + mid: 1.75 + min: 1.5 +- pT: + max: 1101.0 + mid: 1066.5 + min: 1032.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1065024.0 - mid: 1137422.25 - max: 1212201.0 + mid: 1.75 + min: 1.5 +- pT: + max: 1172.0 + mid: 1136.5 + min: 1101.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1212201.0 - mid: 1291632.25 - max: 1373584.0 + mid: 1.75 + min: 1.5 +- pT: + max: 1248.0 + mid: 1210.0 + min: 1172.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1373584.0 - mid: 1464100.0 - max: 1557504.0 + mid: 1.75 + min: 1.5 +- pT: + max: 1327.0 + mid: 1287.5 + min: 1248.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1557504.0 - mid: 1657656.25 - max: 1760929.0 + mid: 1.75 + min: 1.5 +- pT: + max: 1410.0 + mid: 1368.5 + min: 1327.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 1.5 - mid: 1.75 + mid: 8000.0 + min: null + y: max: 2 - p_T2: - min: 1760929.0 - mid: 1872792.25 - max: 1988100.0 + mid: 1.75 + min: 1.5 +- pT: + max: 24.0 + mid: 22.5 + min: 21.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 441.0 - mid: 506.25 - max: 576.0 + mid: 2.25 + min: 2.0 +- pT: + max: 28.0 + mid: 26.0 + min: 24.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 576.0 - mid: 676.0 - max: 784.0 + mid: 2.25 + min: 2.0 +- pT: + max: 32.0 + mid: 30.0 + min: 28.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 784.0 - mid: 900.0 - max: 1024.0 + mid: 2.25 + min: 2.0 +- pT: + max: 37.0 + mid: 34.5 + min: 32.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 1024.0 - mid: 1190.25 - max: 1369.0 + mid: 2.25 + min: 2.0 +- pT: + max: 43.0 + mid: 40.0 + min: 37.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 1369.0 - mid: 1600.0 - max: 1849.0 + mid: 2.25 + min: 2.0 +- pT: + max: 49.0 + mid: 46.0 + min: 43.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 1849.0 - mid: 2116.0 - max: 2401.0 + mid: 2.25 + min: 2.0 +- pT: + max: 56.0 + mid: 52.5 + min: 49.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 2401.0 - mid: 2756.25 - max: 3136.0 + mid: 2.25 + min: 2.0 +- pT: + max: 64.0 + mid: 60.0 + min: 56.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 3136.0 - mid: 3600.0 - max: 4096.0 + mid: 2.25 + min: 2.0 +- pT: + max: 74.0 + mid: 69.0 + min: 64.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 4096.0 - mid: 4761.0 - max: 5476.0 + mid: 2.25 + min: 2.0 +- pT: + max: 84.0 + mid: 79.0 + min: 74.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 5476.0 - mid: 6241.0 - max: 7056.0 + mid: 2.25 + min: 2.0 +- pT: + max: 97.0 + mid: 90.5 + min: 84.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 7056.0 - mid: 8190.25 - max: 9409.0 + mid: 2.25 + min: 2.0 +- pT: + max: 114.0 + mid: 105.5 + min: 97.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 9409.0 - mid: 11130.25 - max: 12996.0 + mid: 2.25 + min: 2.0 +- pT: + max: 133.0 + mid: 123.5 + min: 114.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 12996.0 - mid: 15252.25 - max: 17689.0 + mid: 2.25 + min: 2.0 +- pT: + max: 153.0 + mid: 143.0 + min: 133.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 17689.0 - mid: 20449.0 - max: 23409.0 + mid: 2.25 + min: 2.0 +- pT: + max: 174.0 + mid: 163.5 + min: 153.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 23409.0 - mid: 26732.25 - max: 30276.0 + mid: 2.25 + min: 2.0 +- pT: + max: 196.0 + mid: 185.0 + min: 174.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 30276.0 - mid: 34225.0 - max: 38416.0 + mid: 2.25 + min: 2.0 +- pT: + max: 220.0 + mid: 208.0 + min: 196.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 38416.0 - mid: 43264.0 - max: 48400.0 + mid: 2.25 + min: 2.0 +- pT: + max: 245.0 + mid: 232.5 + min: 220.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 48400.0 - mid: 54056.25 - max: 60025.0 + mid: 2.25 + min: 2.0 +- pT: + max: 272.0 + mid: 258.5 + min: 245.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 60025.0 - mid: 66822.25 - max: 73984.0 + mid: 2.25 + min: 2.0 +- pT: + max: 300.0 + mid: 286.0 + min: 272.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 73984.0 - mid: 81796.0 - max: 90000.0 + mid: 2.25 + min: 2.0 +- pT: + max: 330.0 + mid: 315.0 + min: 300.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 90000.0 - mid: 99225.0 - max: 108900.0 + mid: 2.25 + min: 2.0 +- pT: + max: 362.0 + mid: 346.0 + min: 330.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 108900.0 - mid: 119716.0 - max: 131044.0 + mid: 2.25 + min: 2.0 +- pT: + max: 395.0 + mid: 378.5 + min: 362.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 131044.0 - mid: 143262.25 - max: 156025.0 + mid: 2.25 + min: 2.0 +- pT: + max: 430.0 + mid: 412.5 + min: 395.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 156025.0 - mid: 170156.25 - max: 184900.0 + mid: 2.25 + min: 2.0 +- pT: + max: 468.0 + mid: 449.0 + min: 430.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 184900.0 - mid: 201601.0 - max: 219024.0 + mid: 2.25 + min: 2.0 +- pT: + max: 507.0 + mid: 487.5 + min: 468.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 219024.0 - mid: 237656.25 - max: 257049.0 + mid: 2.25 + min: 2.0 +- pT: + max: 548.0 + mid: 527.5 + min: 507.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 257049.0 - mid: 278256.25 - max: 300304.0 + mid: 2.25 + min: 2.0 +- pT: + max: 592.0 + mid: 570.0 + min: 548.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 300304.0 - mid: 324900.0 - max: 350464.0 + mid: 2.25 + min: 2.0 +- pT: + max: 638.0 + mid: 615.0 + min: 592.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 350464.0 - mid: 378225.0 - max: 407044.0 + mid: 2.25 + min: 2.0 +- pT: + max: 686.0 + mid: 662.0 + min: 638.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 407044.0 - mid: 438244.0 - max: 470596.0 + mid: 2.25 + min: 2.0 +- pT: + max: 737.0 + mid: 711.5 + min: 686.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 470596.0 - mid: 506232.25 - max: 543169.0 + mid: 2.25 + min: 2.0 +- pT: + max: 790.0 + mid: 763.5 + min: 737.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 543169.0 - mid: 582932.25 - max: 624100.0 + mid: 2.25 + min: 2.0 +- pT: + max: 846.0 + mid: 818.0 + min: 790.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 624100.0 - mid: 669124.0 - max: 715716.0 + mid: 2.25 + min: 2.0 +- pT: + max: 905.0 + mid: 875.5 + min: 846.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.0 - mid: 2.25 + mid: 8000.0 + min: null + y: max: 2.5 - p_T2: - min: 715716.0 - mid: 766500.25 - max: 819025.0 + mid: 2.25 + min: 2.0 +- pT: + max: 24.0 + mid: 22.5 + min: 21.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 - max: 3.0 - p_T2: - min: 441.0 - mid: 506.25 - max: 576.0 - sqrts: - min: null mid: 8000.0 - max: null -- y: - min: 2.5 - mid: 2.75 + min: null + y: max: 3.0 - p_T2: - min: 576.0 - mid: 676.0 - max: 784.0 + mid: 2.75 + min: 2.5 +- pT: + max: 28.0 + mid: 26.0 + min: 24.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 784.0 - mid: 900.0 - max: 1024.0 + mid: 2.75 + min: 2.5 +- pT: + max: 32.0 + mid: 30.0 + min: 28.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 1024.0 - mid: 1190.25 - max: 1369.0 + mid: 2.75 + min: 2.5 +- pT: + max: 37.0 + mid: 34.5 + min: 32.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 1369.0 - mid: 1600.0 - max: 1849.0 + mid: 2.75 + min: 2.5 +- pT: + max: 43.0 + mid: 40.0 + min: 37.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 1849.0 - mid: 2116.0 - max: 2401.0 + mid: 2.75 + min: 2.5 +- pT: + max: 49.0 + mid: 46.0 + min: 43.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 2401.0 - mid: 2756.25 - max: 3136.0 + mid: 2.75 + min: 2.5 +- pT: + max: 56.0 + mid: 52.5 + min: 49.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 3136.0 - mid: 3600.0 - max: 4096.0 + mid: 2.75 + min: 2.5 +- pT: + max: 64.0 + mid: 60.0 + min: 56.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 4096.0 - mid: 4761.0 - max: 5476.0 + mid: 2.75 + min: 2.5 +- pT: + max: 74.0 + mid: 69.0 + min: 64.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 5476.0 - mid: 6241.0 - max: 7056.0 + mid: 2.75 + min: 2.5 +- pT: + max: 84.0 + mid: 79.0 + min: 74.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 7056.0 - mid: 8190.25 - max: 9409.0 + mid: 2.75 + min: 2.5 +- pT: + max: 97.0 + mid: 90.5 + min: 84.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 9409.0 - mid: 11130.25 - max: 12996.0 + mid: 2.75 + min: 2.5 +- pT: + max: 114.0 + mid: 105.5 + min: 97.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 12996.0 - mid: 15252.25 - max: 17689.0 + mid: 2.75 + min: 2.5 +- pT: + max: 133.0 + mid: 123.5 + min: 114.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 17689.0 - mid: 20449.0 - max: 23409.0 + mid: 2.75 + min: 2.5 +- pT: + max: 153.0 + mid: 143.0 + min: 133.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 23409.0 - mid: 26732.25 - max: 30276.0 + mid: 2.75 + min: 2.5 +- pT: + max: 174.0 + mid: 163.5 + min: 153.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 30276.0 - mid: 34225.0 - max: 38416.0 + mid: 2.75 + min: 2.5 +- pT: + max: 196.0 + mid: 185.0 + min: 174.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 38416.0 - mid: 43264.0 - max: 48400.0 + mid: 2.75 + min: 2.5 +- pT: + max: 220.0 + mid: 208.0 + min: 196.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 48400.0 - mid: 54056.25 - max: 60025.0 + mid: 2.75 + min: 2.5 +- pT: + max: 245.0 + mid: 232.5 + min: 220.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 60025.0 - mid: 66822.25 - max: 73984.0 + mid: 2.75 + min: 2.5 +- pT: + max: 272.0 + mid: 258.5 + min: 245.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 73984.0 - mid: 81796.0 - max: 90000.0 + mid: 2.75 + min: 2.5 +- pT: + max: 300.0 + mid: 286.0 + min: 272.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 90000.0 - mid: 99225.0 - max: 108900.0 + mid: 2.75 + min: 2.5 +- pT: + max: 330.0 + mid: 315.0 + min: 300.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 108900.0 - mid: 119716.0 - max: 131044.0 + mid: 2.75 + min: 2.5 +- pT: + max: 362.0 + mid: 346.0 + min: 330.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 131044.0 - mid: 143262.25 - max: 156025.0 + mid: 2.75 + min: 2.5 +- pT: + max: 395.0 + mid: 378.5 + min: 362.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 156025.0 - mid: 170156.25 - max: 184900.0 + mid: 2.75 + min: 2.5 +- pT: + max: 430.0 + mid: 412.5 + min: 395.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 184900.0 - mid: 201601.0 - max: 219024.0 + mid: 2.75 + min: 2.5 +- pT: + max: 468.0 + mid: 449.0 + min: 430.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 219024.0 - mid: 237656.25 - max: 257049.0 + mid: 2.75 + min: 2.5 +- pT: + max: 507.0 + mid: 487.5 + min: 468.0 sqrts: - min: null - mid: 8000.0 max: null -- y: - min: 2.5 - mid: 2.75 + mid: 8000.0 + min: null + y: max: 3.0 - p_T2: - min: 257049.0 - mid: 278256.25 - max: 300304.0 + mid: 2.75 + min: 2.5 +- pT: + max: 548.0 + mid: 527.5 + min: 507.0 sqrts: - min: null - mid: 8000.0 max: null + mid: 8000.0 + min: null + y: + max: 3.0 + mid: 2.75 + min: 2.5 diff --git a/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/metadata.yaml b/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/metadata.yaml index 30a37eb43f..8cf95b0027 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/metadata.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/CMS_1JET_8TEV/metadata.yaml @@ -42,7 +42,7 @@ implemented_observables: description: jet rapidity label: $|y|$ units: '' - p_T2: + pT: description: jet transverse momentum label: $k_{T}$ units: GeV @@ -73,11 +73,11 @@ implemented_observables: operation: 'null' plotting: dataset_label: CMS jets 8 TeV - kinematics_override: jet_sqrt_scale + kinematics_override: identity plot_x: y figure_by: - - p_T2 + - pT kinematic_coverage: - y - - p_T2 + - pT - sqrts diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/data_legacy_ASY.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/data_legacy_ASY.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/data_legacy_ASY.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/data_legacy_ASY.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/kinematics_ASY.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/kinematics_ASY.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/kinematics_ASY.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/kinematics_ASY.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/metadata.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/metadata.yaml similarity index 98% rename from validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/metadata.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/metadata.yaml index 5b88502b4a..85de69d401 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/metadata.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/metadata.yaml @@ -1,4 +1,4 @@ -setname: D0_WPWM_1P96GEV +setname: D0_WPWM_1P96TEV version: 1 version_comment: Port of old commondata nnpdf_metadata: diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/uncertainties_legacy_ASY.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/uncertainties_legacy_ASY.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96GEV/uncertainties_legacy_ASY.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_WPWM_1P96TEV/uncertainties_legacy_ASY.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/data_legacy_ZRAP.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/data_legacy_ZRAP.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/data_legacy_ZRAP.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/data_legacy_ZRAP.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/kinematics_ZRAP.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/kinematics_ZRAP.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/kinematics_ZRAP.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/kinematics_ZRAP.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/metadata.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/metadata.yaml similarity index 98% rename from validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/metadata.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/metadata.yaml index 2dc7068283..0f1c84e620 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/metadata.yaml +++ b/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/metadata.yaml @@ -1,4 +1,4 @@ -setname: D0_Z0_1P96GEV +setname: D0_Z0_1P96TEV version: 1 version_comment: Port of old commondata nnpdf_metadata: diff --git a/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/uncertainties_legacy_ZRAP.yaml b/validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/uncertainties_legacy_ZRAP.yaml similarity index 100% rename from validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96GEV/uncertainties_legacy_ZRAP.yaml rename to validphys2/src/validphys/datafiles/new_commondata/D0_Z0_1P96TEV/uncertainties_legacy_ZRAP.yaml diff --git a/validphys2/src/validphys/datafiles/new_commondata/dataset_names.yml b/validphys2/src/validphys/datafiles/new_commondata/dataset_names.yml index c8308514e8..827617ca10 100644 --- a/validphys2/src/validphys/datafiles/new_commondata/dataset_names.yml +++ b/validphys2/src/validphys/datafiles/new_commondata/dataset_names.yml @@ -270,13 +270,13 @@ BCDMSP_dwsh: dataset: BCDMS_NC_NOTFIXED_P_DW_EM-F2 variant: legacy CDFZRAP_NEW: - dataset: CDF_Z0_1P96GEV_ZRAP + dataset: CDF_Z0_1P96TEV_ZRAP variant: legacy D0WMASY: - dataset: D0_WPWM_1P96GEV_ASY + dataset: D0_WPWM_1P96TEV_ASY variant: legacy D0ZRAP_40: - dataset: D0_Z0_1P96GEV_ZRAP + dataset: D0_Z0_1P96TEV_ZRAP variant: legacy CMSWCHARMRAT: dataset: CMS_WCHARM_7TEV_WPWM-RATIO diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 566b372c6d..367f29ede5 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -3,29 +3,30 @@ """ from collections.abc import Mapping +import functools from importlib.resources import read_text import logging import re -import functools import numpy as np from reportengine.checks import check, make_check from reportengine.compat import yaml -from validphys.commondatawriter import write_commondata_to_file, write_systype_to_file import validphys.cuts -from validphys.utils import freeze_args +from validphys.process_options import PROCESSES +from validphys.utils import freeze_args, generate_path_filtered_data log = logging.getLogger(__name__) KIN_LABEL = { "DIS": ("x", "Q2", "y"), "DYP": ("y", "M2", "sqrts"), - "JET": ("eta", "p_T2", "sqrts"), + "JET": ("eta", "pT2", "sqrts"), "DIJET": ("eta", "m_12", "sqrts"), "PHT": ("eta_gamma", "E_{T,gamma)2", "sqrts"), "INC": ("0", "mu2", "sqrts"), "EWK_RAP": ("etay", "M2", "sqrts"), + "EWK_RAP_ASY": ("etay", "M2", "sqrts"), "EWK_PT": ("p_T", "M2", "sqrts"), "EWK_PTRAP": ("etay", "p_T2", "sqrts"), "EWK_MLL": ("M_ll", "M_ll2", "sqrts"), @@ -45,6 +46,42 @@ } +def _get_kinlabel_process_type(process_type): + """Get KIN_LABEL from the dictionary above according + to the process type + This requires some extra digestion for DIS + """ + if isinstance(process_type, str): + process_type = PROCESSES.get(process_type.upper(), process_type.upper()) + if hasattr(process_type, "accepted_variables"): + return process_type.accepted_variables + process_type = str(process_type) + if process_type[:3] == "DIS": + return KIN_LABEL["DIS"] + return KIN_LABEL[process_type] + + +# TODO: in the new commondata instead of having this, let's always use the same +# variables +def _variable_understanding(variables_raw, process_vars): + """Given a set of variable, check whether it might be a variation of existing + variables for a process type""" + variables = [i for i in variables_raw] + + def substitute(pr_v, cd_x): + if pr_v in process_vars and cd_x in variables: + variables[variables.index(cd_x)] = pr_v + + substitute("eta", "y") + substitute("eta", "eta") + substitute("etay", "eta") + substitute("etay", "y") + substitute("yQQ", "y_ttBar") + substitute("yQ", "y_t") + + return variables + + class RuleProcessingError(Exception): """Exception raised when we couldn't process a rule.""" @@ -230,13 +267,17 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, data closure_data = level0_commondata_wc(data, fakepdf) + # Keep track of the original commondata, since it is what will be used to export + # the data afterwards + all_raw_commondata = {} + for dataset in data.datasets: # == print number of points passing cuts, make dataset directory and write FKMASK ==# path = filter_path / dataset.name nfull, ncut = _write_ds_cut_data(path, dataset) - make_dataset_dir(path / "systypes") total_data_points += nfull total_cut_data_points += ncut + all_raw_commondata[dataset.name] = dataset.commondata.load() if fakenoise: # ======= Level 1 closure test =======# @@ -250,10 +291,21 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, data log.info("Writing Level0 data") for cd in closure_data: - path_cd = filter_path / cd.setname / f"DATA_{cd.setname}.dat" - path_sys = filter_path / cd.setname / "systypes" / f"SYSTYPE_{cd.setname}_DEFAULT.dat" - write_commondata_to_file(commondata=cd, path=path_cd) - write_systype_to_file(commondata=cd, path=path_sys) + # Write the full dataset, not only the points that pass the filter + data_path, unc_path = generate_path_filtered_data(filter_path.parent, cd.setname) + data_path.parent.mkdir(exist_ok=True, parents=True) + + raw_cd = all_raw_commondata[cd.setname] + + data_range = np.arange(1, 1 + raw_cd.ndata) + + # Now put the closure data into the raw original commondata + new_cv = cd.central_values.reindex(data_range, fill_value=0.0).values + output_cd = raw_cd.with_central_value(new_cv) + + # And export it to file + output_cd.export_data(data_path.open("w", encoding="utf-8")) + output_cd.export_uncertainties(unc_path.open("w", encoding="utf-8")) return total_data_points, total_cut_data_points @@ -368,6 +420,12 @@ class Rule: A rule object is created for each rule in ./cuts/filters.yaml + Old commondata relied on the order of the kinematical variables + to be the same as specified in the `KIN_LABEL` dictionary set in this module. + The new commondata specification instead defines explicitly the name of the + variables in the metadata. + Therefore, when using a new-format commondata, the KIN_LABEL dictionary + will not be used and the variables defined in it will be used instead. Parameters ---------- @@ -411,19 +469,22 @@ def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dic if loader is None: loader = Loader() + try: cd = loader.check_commondata(self.dataset) except LoaderError as e: raise RuleProcessingError(f"Could not find dataset {self.dataset}") from e - if cd.process_type[:3] == "DIS": - self.variables = KIN_LABEL["DIS"] + + if cd.legacy: + self.variables = _get_kinlabel_process_type(cd.process_type) else: - self.variables = KIN_LABEL[cd.process_type] + if cd.metadata.is_ported_dataset: + self.variables = _get_kinlabel_process_type(cd.process_type) + else: + self.variables = cd.metadata.kinematic_coverage else: - if self.process_type[:3] == "DIS": - self.variables = KIN_LABEL["DIS"] - else: - self.variables = KIN_LABEL[self.process_type] + self.variables = _get_kinlabel_process_type(self.process_type) + # TODO: for now this will be a string within this class if hasattr(self, "local_variables"): if not isinstance(self.local_variables, Mapping): @@ -471,6 +532,10 @@ def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dic f"Could not process rule {self.rule_string!r}: Unknown name {name!r}" ) + # Before returning, set the process type as a string for the rest of the filter + if self.process_type is not None: + self.process_type = str(self.process_type) + @property def _properties(self): """Attributes of the Rule class that are defining. Two @@ -498,7 +563,7 @@ def __call__(self, dataset, idat): # is different to the case where the rule does apply, # but the point was cut out by the rule. if ( - dataset.setname != self.dataset + (dataset.setname != self.dataset and dataset.legacy_name != self.dataset) and process_name != self.process_type and self.process_type != "DIS_ALL" ): @@ -531,8 +596,21 @@ def __repr__(self): # pragma: no cover def _make_kinematics_dict(self, dataset, idat) -> dict: """Fill in a dictionary with the kinematics for each point""" + # TODO + # When applying a "process-type" rule the variables are as given + # at the top of the module. However, for new commondata is important + # that the variables come in the right order + # This "understanding" should not be necessary and the process-variable + # mapping in this module should only serve to check which variables are allowed kinematics = dataset.kinematics.values[idat] - return dict(zip(self.variables, kinematics)) + if dataset.legacy or "k1" in dataset.kin_variables: + # For ported dataset, the naming is k1/k2/k3 and + # thus it needs to rely on the process + return dict(zip(self.variables, kinematics)) + + # Use the order of the commondata and the sintax of KIN_LABEL + new_vars = _variable_understanding(dataset.kin_variables, self.variables) + return dict(zip(new_vars, kinematics)) def _make_point_namespace(self, dataset, idat) -> dict: """Return a dictionary with kinematics and local diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py index 40c8197a11..d87138a2e2 100644 --- a/validphys2/src/validphys/fitdata.py +++ b/validphys2/src/validphys/fitdata.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Utilities for loading data from fit folders """ @@ -18,7 +17,7 @@ from reportengine.table import table from validphys import checks from validphys.core import PDF -from validphys.plotoptions import get_info +from validphys.plotoptions.core import get_info # TODO: Add more stuff here as needed for postfit LITERAL_FILES = ['chi2exps.log'] @@ -33,7 +32,7 @@ def num_fitted_replicas(fit): """Function to obtain the number of nnfit replicas. That is the number of replicas before postfit was run. """ - with open(fit.path / "postfit" / "veto_count.json", 'r') as stream: + with open(fit.path / "postfit" / "veto_count.json") as stream: veto = json.load(stream) # In principle we could use any of the other keys return len(veto["Positivity"]) @@ -276,7 +275,7 @@ def match_datasets_by_name(fits, fits_datasets): the corresponfing dataset inclucded only in the first fit and only in the second fit.""" - firstds, secondds = [{ds.name: ds for ds in datasets} for datasets in fits_datasets] + firstds, secondds = ({ds.name: ds for ds in datasets} for datasets in fits_datasets) common_keys = firstds.keys() & secondds.keys() first_keys = firstds.keys() - secondds.keys() seccond_keys = secondds.keys() - firstds.keys() @@ -365,12 +364,8 @@ def test_for_same_cuts(fits, match_datasets_by_name): else: c2 = np.arange(second.commondata.ndata) if not np.array_equal(c1, c2): - msg = "Cuts for %s are not the same:\n%s:\n%s\n\n%s:\n%s" % ( - ds, - first_fit, - c1, - second_fit, - c2, + msg = "Cuts for {} are not the same:\n{}:\n{}\n\n{}:\n{}".format( + ds, first_fit, c1, second_fit, c2 ) log.info(msg) res.append((first, second)) diff --git a/validphys2/src/validphys/kinematics.py b/validphys2/src/validphys/kinematics.py index 295bcd30fb..28c0217ffa 100644 --- a/validphys2/src/validphys/kinematics.py +++ b/validphys2/src/validphys/kinematics.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Provides information on the kinematics involved in the data. @@ -13,8 +12,8 @@ from reportengine import collect from reportengine.checks import check_positive from reportengine.table import table -from validphys import plotoptions from validphys.core import CutsPolicy +from validphys.plotoptions import core as plotoptions_core log = logging.getLogger(__name__) @@ -29,7 +28,7 @@ def describe_kinematics(commondata, titlelevel: int = 1): import inspect cd = commondata - info = plotoptions.get_info(cd) + info = plotoptions_core.get_info(cd) proc = cd.load_commondata().commondataproc src = inspect.getsource(info.kinematics_override.xq2map) titlespec = '#' * titlelevel @@ -72,9 +71,9 @@ def kinlimits(commondata, cuts, use_cuts, use_kinoverride: bool = True): be ignored and the kinematics will be interpred based on the process type only. If use_cuts is 'CutsPolicy.NOCUTS', the information on the total number of points will be displayed, instead of the fitted ones.""" - info = plotoptions.get_info(commondata, cuts=None, use_plotfiles=use_kinoverride) + info = plotoptions_core.get_info(commondata, cuts=None, use_plotfiles=use_kinoverride) - kintable = plotoptions.kitable(commondata, info) + kintable = plotoptions_core.kitable(commondata, info) ndata = len(kintable) if cuts: kintable = kintable.loc[cuts.load()] @@ -143,7 +142,10 @@ def all_commondata_grouping(all_commondata, metadata_group): records = [] for cd in all_commondata: records.append( - {'dataset': str(cd), metadata_group: getattr(plotoptions.get_info(cd), metadata_group)} + { + 'dataset': str(cd), + metadata_group: getattr(plotoptions_core.get_info(cd), metadata_group), + } ) df = pd.DataFrame.from_records(records, index='dataset') # sort first by grouping alphabetically and then dataset name @@ -163,28 +165,25 @@ def xq2map_with_cuts(commondata, cuts, group_name=None): """Return two (x,Q²) tuples: one for the fitted data and one for the cut data. If `display_cuts` is false or all data passes the cuts, the second tuple will be empty.""" - info = plotoptions.get_info(commondata) - kintable = plotoptions.kitable(commondata, info) + info = plotoptions_core.get_info(commondata) + kintable = plotoptions_core.kitable(commondata, info) if cuts: mask = cuts.load() boolmask = np.zeros(len(kintable), dtype=bool) boolmask[mask] = True fitted_kintable = kintable.loc[boolmask] masked_kitable = kintable.loc[~boolmask] - xq2fitted = plotoptions.get_xq2map(fitted_kintable, info) - xq2masked = plotoptions.get_xq2map(masked_kitable, info) - return XQ2Map(info.experiment, commondata, xq2fitted, xq2masked, group_name) - fitted_kintable = plotoptions.get_xq2map(kintable, info) - empty = (np.array([]), np.array([])) - return XQ2Map(info.experiment, commondata, fitted_kintable, empty, group_name) + xq2fitted = plotoptions_core.get_xq2map(fitted_kintable, info) + xq2masked = plotoptions_core.get_xq2map(masked_kitable, info) + else: + xq2fitted = plotoptions_core.get_xq2map(kintable, info) + xq2masked = (np.array([]), np.array([])) + + return XQ2Map(info.experiment, commondata, xq2fitted, xq2masked, group_name) dataset_inputs_by_groups_xq2map = collect( - xq2map_with_cuts, - ( - 'group_dataset_inputs_by_metadata', - 'data_input', - ), + xq2map_with_cuts, ('group_dataset_inputs_by_metadata', 'data_input') ) @@ -199,8 +198,8 @@ def kinematics_table_notable(commondata, cuts, show_extra_labels: bool = False): PLOTTING files will be displayed. Otherwise only the original three kinematics will be shown. """ - info = plotoptions.get_info(commondata, cuts=cuts) - res = plotoptions.kitable(commondata, info, cuts=cuts) + info = plotoptions_core.get_info(commondata, cuts=cuts) + res = plotoptions_core.kitable(commondata, info, cuts=cuts) res.columns = [*info.kinlabels, *res.columns[3:]] if not show_extra_labels: res = res.iloc[:, :3] diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index bc8ccd155e..c1093febe0 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Resolve paths to useful objects, and query the existence of different resources within the specified paths. @@ -22,7 +21,8 @@ from reportengine import filefinder from reportengine.compat import yaml -from validphys import lhaindex, pineparser +from validphys import lhaindex +from validphys.commondataparser import load_commondata_old, parse_new_metadata from validphys.core import ( PDF, CommonDataSpec, @@ -39,8 +39,8 @@ TheoryIDSpec, peek_commondata_metadata, ) -from validphys.datafiles import path_vpdata -from validphys.utils import tempfile_cleaner +from validphys.datafiles import legacy_to_new_mapping, path_vpdata +from validphys.utils import generate_path_filtered_data, tempfile_cleaner log = logging.getLogger(__name__) NNPDF_DIR = "NNPDF" @@ -78,6 +78,10 @@ class TheoryNotFound(LoadFailedError): pass +class TheoryMetadataNotFound(LoadFailedError): + pass + + class TheoryDataBaseNotFound(LoadFailedError): pass @@ -148,7 +152,7 @@ def _get_nnpdf_profile(profile_path=None): profile_path = config_nnprofile if profile_path is not None: - with open(profile_path, "r", encoding="utf-8") as f: + with open(profile_path, encoding="utf-8") as f: profile_entries = yaml_reader.load(f) if profile_entries is not None: profile_dict.update(profile_entries) @@ -189,6 +193,37 @@ def _get_nnpdf_profile(profile_path=None): return profile_dict +def _use_fit_commondata_old_format_to_new_format(setname, file_path): + """Reads an old commondata written in the old format + (e.g., a closure test ran for NNPDF4.0) and creates a new-format version + in a temporary folder to be read by the commondata. + Note that this does not modify the fit""" + if not file_path.exists(): + raise DataNotFoundError(f"Data for {setname} at {file_path} not found") + + # Try loading the data from file_path, using the systypes from there + # although they are not used + systypes = next(file_path.parent.glob("systypes/*.dat")) + commondata = load_commondata_old(file_path, systypes, setname) + + # Export the data central + new_data_stream = tempfile.NamedTemporaryFile( + delete=False, prefix=f"filter_{setname}_data", suffix=".yaml", mode="w" + ) + commondata.export_data(new_data_stream) + new_data_stream.close() + data_path = pathlib.Path(new_data_stream.name) + + # Export the uncertainties + new_unc_stream = tempfile.NamedTemporaryFile( + delete=False, prefix=f"filter_{setname}_uncertainties", suffix=".yaml", mode="w" + ) + commondata.export_uncertainties(new_data_stream) + new_unc_stream.close() + unc_path = pathlib.Path(new_data_stream.name) + return data_path, unc_path + + class LoaderBase: """ Base class for the NNPDF loader. @@ -217,7 +252,7 @@ def __init__(self, profile=None): self.datapath = datapath self._theories_path = theories_path self.resultspath = resultspath - self._old_commondata_fits = set() + self._extremely_old_fits = set() self.nnprofile = profile @property @@ -238,64 +273,6 @@ def _vp_cache(self): return vpcache -def rebuild_commondata_without_cuts(filename_with_cuts, cuts, datapath_filename, newpath): - """Take a CommonData file that is stored with the cuts applied - and write another file with no cuts. The points that were not present in - the original file have the same kinematics as the file in - ``datapath_filename``, which must correspond to the original CommonData - file which does not have the cuts applied. However, to avoid confusion, the - values and uncertainties are all set to zero. The new file is written - to ``newpath``. - """ - - metadata = peek_commondata_metadata(datapath_filename) - if cuts is None: - shutil.copy2(filename_with_cuts, newpath) - return - - index_pattern = re.compile(r'(?P\s*)(?P\d+)') - data_line_pattern = re.compile( - r'\s*(?P\d+)' r'\s+(?P\S+)\s+' r'(?P(\s*\S+){3})\s+' - ) - mask = cuts.load() - maskiter = iter(mask) - ndata = metadata.ndata - nsys = metadata.nsys - - next_index = next(maskiter) - with open(filename_with_cuts, 'r') as fitfile, open(datapath_filename) as dtfile, open( - newpath, 'w' - ) as newfile: - newfile.write(dtfile.readline()) - # discard this line - fitfile.readline() - for i in range(1, ndata + 1): - # You gotta love mismatched indexing - if i - 1 == next_index: - line = fitfile.readline() - line = re.sub(index_pattern, rf'\g{i}', line, count=1) - newfile.write(line) - next_index = next(maskiter, None) - # drop the data file line - dtfile.readline() - else: - line = dtfile.readline() - # check that we know where we are - m = re.match(index_pattern, line) - assert int(m.group('index')) == i - # We have index, process type, and 3*kinematics - # that we would like to keep. - m = re.match(data_line_pattern, line) - newfile.write(line[: m.end()]) - # And value, stat, *sys that we want to drop - # Do not use string join to keep up with the ugly format - # This should really be nan's, but the c++ streams that could read this - # do not have the right interface. - # https://stackoverflow.com/questions/11420263/is-it-possible-to-read-infinity-or-nan-values-using-input-streams - zeros = '-0\t' * (2 + 2 * nsys) - newfile.write(f'{zeros}\n') - - # TODO: Deprecate get methods? class Loader(LoaderBase): """Load various resources from the NNPDF data path.""" @@ -326,15 +303,29 @@ def available_theories(self): @property @functools.lru_cache() - def available_datasets(self): + def _available_old_datasets(self): + """Provide all available datasets + At the moment this means cominbing the new and olf format datasets + """ data_str = "DATA_" - # We filter out the positivity sets here + old_commondata_folder = self.commondata_folder.with_name("commondata") + # We filter out the positivity and integrability sets here return { file.stem[len(data_str) :] - for file in self.commondata_folder.glob(f'{data_str}*.dat') + for file in old_commondata_folder.glob(f'{data_str}*.dat') if not file.stem.startswith((f"{data_str}POS", f"{data_str}INTEG")) } + @property + @functools.lru_cache() + def available_datasets(self): + """Provide all available datasets other then positivitiy and integrability. + At the moment this only returns old datasets for which we have a translation available + """ + skip = ("POS", "INTEG") + old_datasets = [i for i in legacy_to_new_mapping.keys() if not i.startswith(skip)] + return set(old_datasets) + @property @functools.lru_cache() def available_pdfs(self): @@ -342,48 +333,124 @@ def available_pdfs(self): @property def commondata_folder(self): - return self.datapath / 'commondata' + return self.datapath / 'new_commondata' + + def _use_fit_commondata_old_format_to_old_format(self, basedata, fit): + """Load pseudodata from a fit where the data was generated in the old format + and does not exist a new-format version. + """ + # TODO: deprecated, will be removed + setname = basedata.name + log.warning(f"Please update {basedata} to the new format to keep using it") + datafilefolder = (fit.path / 'filter') / setname + data_path = datafilefolder / f'FILTER_{setname}.dat' + + if not data_path.exists(): + oldpath = datafilefolder / f'DATA_{setname}.dat' + if not oldpath.exists(): + raise DataNotFoundError(f"{data_path} is needed with `use_fitcommondata`") + + raise DataNotFoundError( + f"""This data format: {oldpath} is no longer supported +In order to upgrade it you need to use the script `vp-rebuild-data` with a version of NNPDF < 4.0.9""" + ) + return data_path + + def check_commondata( + self, + setname, + sysnum=None, + use_fitcommondata=False, + fit=None, + variant=None, + force_old_format=False, + ): + """Prepare the commondata files to be loaded. + A commondata is defined by its name (``setname``) and the variant (``variant``) + + At the moment both old-format and new-format commondata can be utilized and loaded + however old-format commondata are deprecated and will be removed in future relases. + + The function ``parse_dataset_input`` in ``config.py`` translates all known old commondata + into their new names (and variants), + therefore this function should only receive requestes for new format. + + Any actions trying to requests an old-format commondata from this function will log + an error message. This error message will eventually become an actual error. + """ + datafile = None + metadata_path = None + old_commondata_folder = self.commondata_folder.with_name("commondata") - def check_commondata(self, setname, sysnum=None, use_fitcommondata=False, fit=None): if use_fitcommondata: if not fit: raise LoadFailedError("Must specify a fit when setting use_fitcommondata") - datafilefolder = (fit.path / 'filter') / setname - newpath = datafilefolder / f'FILTER_{setname}.dat' - if not newpath.exists(): - oldpath = datafilefolder / f'DATA_{setname}.dat' - if not oldpath.exists(): - raise DataNotFoundError( - f"Either {newpath} or {oldpath} are needed with `use_fitcommondata`" - ) - # This is to not repeat all the error handling stuff - basedata = self.check_commondata(setname, sysnum=sysnum) - basedata_path = basedata.datafile - cuts = self.check_fit_cuts(basedata, fit=fit) - - if fit not in self._old_commondata_fits: - self._old_commondata_fits.add(fit) - log.warning( - f"Found fit using old commondata export settings: " - f"'{fit}'. The commondata that are used in this run " - "will be updated now." - "Please consider re-uploading it." - ) - log.warning("Points that do not pass the cuts are set to zero!") - - log.info(f"Upgrading filtered commondata. Writing {newpath}") - rebuild_commondata_without_cuts(oldpath, cuts, basedata_path, newpath) - datafile = newpath - else: - datafile = self.commondata_folder / f'DATA_{setname}.dat' + # Using commondata generated with a previous fit requires some branching since it depends on + # 1. Whether the data is now in the new commondata + # 2. Whether the data was in the old format when it was generated + + # First, load the base commondata which will be used as container and to check point 1 + basedata = self.check_commondata( + setname, variant=variant, force_old_format=force_old_format, sysnum=sysnum + ) + # and the possible filename for the new data + data_path, unc_path = generate_path_filtered_data(fit.path, setname) + + # If this is a legacy set, by definition the data that was written can only be legacy + if basedata.legacy: + data_path = self._use_fit_commondata_old_format_to_old_format(basedata, fit) + elif not data_path.exists(): + # If the data path does not exist, we might be dealing with data generated with + # the old name, translate the csv into a yaml file that the paraser can understand + legacy_name = basedata.legacy_name + old_path = fit.path / "filter" / legacy_name / f"FILTER_{legacy_name}.dat" + data_path, unc_path = _use_fit_commondata_old_format_to_new_format( + setname, old_path + ) + + return basedata.with_modified_data(data_path, uncertainties_file=unc_path) + + # Get data folder and observable name and check for existence + try: + if not force_old_format: + setfolder, observable_name = setname.rsplit("_", 1) + metadata_path = self.commondata_folder / setfolder / "metadata.yaml" + force_old_format = not metadata_path.exists() + except ValueError: + log.warning(f"Error trying to read {setname}, falling back to the old format reader") + force_old_format = True + + if not force_old_format: + # Get the instance of ObservableMetaData + try: + metadata = parse_new_metadata(metadata_path, observable_name, variant=variant) + return CommonDataSpec(setname, metadata) + except ValueError as e: + # Before failure, check whetehr this might be an old dataset + datafile = old_commondata_folder / f"DATA_{setname}.dat" + if not datafile.exists(): + raise e + + force_old_format = True + metadata_path = None + + # Eventually the error log will be replaced by the commented execption + log.error( + f"Trying to read {setname} in the old format. Note that this is deprecated and will be removed in future releases" + ) + + # Everything below is deprecated and will be removed in future releases + if datafile is None: + datafile = old_commondata_folder / f"DATA_{setname}.dat" + if not datafile.exists(): raise DataNotFoundError( - ("Could not find Commondata set: '%s'. " "File '%s' does not exist.") - % (setname, datafile) + f"No .dat file found for {setname} and no new data translation found" ) + if sysnum is None: sysnum = 'DEFAULT' - sysfile = self.commondata_folder / 'systypes' / ('SYSTYPE_%s_%s.dat' % (setname, sysnum)) + sysfile = old_commondata_folder / "systypes" / f"SYSTYPE_{setname}_{sysnum}.dat" if not sysfile.exists(): raise SysNotFoundError( @@ -394,13 +461,13 @@ def check_commondata(self, setname, sysnum=None, use_fitcommondata=False, fit=No plotfiles = [] metadata = peek_commondata_metadata(datafile) - process_plotting_root = self.commondata_folder / f'PLOTTINGTYPE_{metadata.process_type}' + process_plotting_root = old_commondata_folder / f'PLOTTINGTYPE_{metadata.process_type}' type_plotting = ( process_plotting_root.with_suffix('.yml'), process_plotting_root.with_suffix('.yaml'), ) - data_plotting_root = self.commondata_folder / f'PLOTTING_{setname}' + data_plotting_root = old_commondata_folder / f'PLOTTING_{setname}' data_plotting = ( data_plotting_root.with_suffix('.yml'), @@ -416,7 +483,10 @@ def check_commondata(self, setname, sysnum=None, use_fitcommondata=False, fit=No f"The name found in the CommonData file, {metadata.name}, did " f"not match the dataset name, {setname}." ) - return CommonDataSpec(datafile, sysfile, plotfiles, name=setname, metadata=metadata) + + return CommonDataSpec( + setname, metadata, legacy=True, datafile=datafile, sysfile=sysfile, plotfiles=plotfiles + ) @functools.lru_cache() def check_theoryID(self, theoryID): @@ -424,7 +494,7 @@ def check_theoryID(self, theoryID): theopath = self._theories_path / f"theory_{theoryID}" if not theopath.exists(): raise TheoryNotFound( - "Could not find theory %s. Folder '%s' not found" % (theoryID, theopath) + "Could not find theory {}. Folder '{}' not found".format(theoryID, theopath) ) return TheoryIDSpec(theoryID, theopath, self.theorydb_file) @@ -438,6 +508,8 @@ def theorydb_file(self): def get_commondata(self, setname, sysnum): """Get a Commondata from the set name and number.""" + # TODO: check where this is used + # as this might ignore cfactors or variants cd = self.check_commondata(setname, sysnum) return cd.load() @@ -447,40 +519,35 @@ def check_fktable(self, theoryID, setname, cfac): fkpath = theopath / 'fastkernel' / ('FK_%s.dat' % setname) if not fkpath.exists(): raise FKTableNotFound( - "Could not find FKTable for set '%s'. File '%s' not found" % (setname, fkpath) + "Could not find FKTable for set '{}'. File '{}' not found".format(setname, fkpath) ) cfactors = self.check_cfactor(theoryID, setname, cfac) return FKTableSpec(fkpath, cfactors) - def check_fkyaml(self, name, theoryID, cfac): - """Load a pineappl fktable - Receives a yaml file describing the fktables necessary for a given observable + def check_fk_from_theory_metadata(self, theory_metadata, theoryID, cfac=None): + """Load a pineappl fktable in the new commondata forma + Receives a theory metadata describing the fktables necessary for a given observable the theory ID and the corresponding cfactors. The cfactors should correspond directly to the fktables, the "compound folder" is not supported for pineappl theories. As such, the name of the cfactor is expected to be CF_{cfactor_name}_{fktable_name} """ theory = self.check_theoryID(theoryID) - if (theory.path / "compound").exists(): - raise LoadFailedError(f"New theories (id=${theoryID}) do not accept compound files") - - fkpath = (theory.yamldb_path / name).with_suffix(".yaml") - metadata, fklist = pineparser.get_yaml_information(fkpath, theory.path) - op = metadata["operation"] + fklist = theory_metadata.fktables_to_paths(theory.path / "fastkernel") + op = theory_metadata.operation - if not cfac: - fkspecs = [FKTableSpec(i, None, metadata) for i in fklist] + if not cfac or cfac is None: + fkspecs = [FKTableSpec(i, None, theory_metadata) for i in fklist] return fkspecs, op - operands = metadata["operands"] cfactors = [] - for operand in operands: + for operand in theory_metadata.FK_tables: tmp = [self.check_cfactor(theoryID, fkname, cfac) for fkname in operand] cfactors.append(tuple(tmp)) - fkspecs = [FKTableSpec(i, c, metadata) for i, c in zip(fklist, cfactors)] - return fkspecs, op + fkspecs = [FKTableSpec(i, c, theory_metadata) for i, c in zip(fklist, cfactors)] + return fkspecs, theory_metadata.operation def check_compound(self, theoryID, setname, cfac): thid, theopath = self.check_theoryID(theoryID) @@ -527,24 +594,21 @@ def check_cfactor(self, theoryID, setname, cfactors): return tuple(cf) - def check_posset(self, theoryID, setname, postlambda): - """Load a positivity dataset""" + def _check_lagrange_multiplier_set(self, theoryID, setname): + """Check an integrability or positivity dataset""" cd = self.check_commondata(setname, 'DEFAULT') th = self.check_theoryID(theoryID) - if th.is_pineappl(): - fk, _ = self.check_fkyaml(setname, theoryID, []) - else: - fk = self.check_fktable(theoryID, setname, []) + fk, _ = self._check_theory_old_or_new(th, cd, []) + return cd, fk, th + + def check_posset(self, theoryID, setname, postlambda): + """Load a positivity dataset""" + cd, fk, th = self._check_lagrange_multiplier_set(theoryID, setname) return PositivitySetSpec(setname, cd, fk, postlambda, th) def check_integset(self, theoryID, setname, postlambda): """Load an integrability dataset""" - cd = self.check_commondata(setname, 'DEFAULT') - th = self.check_theoryID(theoryID) - if th.is_pineappl(): - fk, _ = self.check_fkyaml(setname, theoryID, []) - else: - fk = self.check_fktable(theoryID, setname, []) + cd, fk, th = self._check_lagrange_multiplier_set(theoryID, setname) return IntegrabilitySetSpec(setname, cd, fk, postlambda, th) def get_posset(self, theoryID, setname, postlambda): @@ -602,6 +666,30 @@ def check_default_filter_rules(self, theoryid, defaults=None): for inp in default_filter_rules_input() ] + def _check_theory_old_or_new(self, theoryid, commondata, cfac): + """Given a theory and a commondata and a theory load the right fktable + checks whether: + 1. the theory is a pineappl theory + 2. Select the right information (commondata name, legacy name or theory meta) + """ + theoryno, _ = theoryid + if theoryid.is_pineappl(): + if (thmeta := commondata.metadata.theory) is None: + # Regardless of the type of theory, request the existence of the field + raise TheoryMetadataNotFound(f"No theory metadata found for {name}") + fkspec, op = self.check_fk_from_theory_metadata(thmeta, theoryno, cfac) + else: + # Old theories can only be used with datasets that have a corresponding + # old name to map to, and so we need to be able to load the cd at this point + legacy_name = commondata.load().legacy_name + # This might be slow, if it becomes a problem, the map function can be used instead + try: + fkspec, op = self.check_compound(theoryno, legacy_name, cfac) + except CompoundNotFound: + fkspec = self.check_fktable(theoryno, legacy_name, cfac) + op = None + return fkspec, op + def check_dataset( self, name, @@ -615,6 +703,7 @@ def check_dataset( use_fitcommondata=False, fit=None, weight=1, + variant=None, ): """Loads a given dataset If the dataset contains new-type fktables, use the @@ -625,19 +714,30 @@ def check_dataset( theoryno, _ = theoryid + # TODO: + # The dataset is checked twice, once here + # and once by config in produce_commondata + # once of the two __must__ be superfluous + # note that both use information from dataset_input commondata = self.check_commondata( - name, sysnum, use_fitcommondata=use_fitcommondata, fit=fit + name, sysnum, use_fitcommondata=use_fitcommondata, fit=fit, variant=variant ) - if theoryid.is_pineappl(): - # If it is a pineappl theory, use the pineappl reader - fkspec, op = self.check_fkyaml(name, theoryno, cfac) - else: + if commondata.legacy: + if theoryid.is_pineappl(): + raise LoaderError( + f"Trying to use a new theory with an old commondata format, surely it must be a mistake: {name}" + ) + + # Old-format commondata that we haven't been able to translate + # allows only for the usage of only old-format theories try: fkspec, op = self.check_compound(theoryno, name, cfac) except CompoundNotFound: fkspec = self.check_fktable(theoryno, name, cfac) op = None + else: + fkspec, op = self._check_theory_old_or_new(theoryid, commondata, cfac) # Note this is simply for convenience when scripting. The config will # construct the actual Cuts object by itself @@ -709,12 +809,37 @@ def check_fit_cuts(self, commondata, fit): if not isinstance(fit, FitSpec): fit = self.check_fit(fit) _, fitpath = fit - p = (fitpath / 'filter') / setname / ('FKMASK_' + setname + '.dat') - if not p.parent.exists(): - raise CutsNotFound(f"Bad filter configuration. Could not find {p.parent}") - if not p.exists(): - p = None - return Cuts(commondata, p) + + cuts_path = (fitpath / 'filter') / setname / ('FKMASK_' + setname + '.dat') + + # After 4.0.9 we changed to a new commondata format + # In order to utilize cuts from old fits in new fits it is necessary to translate the names + # There are two translation that might be necessary: + # 1. New names in the runcard, old cuts in the 'fromfit' fit + # 2. Old names in the runcard, new cuts in the 'fromfit' fit + # In order to enforce the usage of the new names, only (1.) will be implemented + + if not cuts_path.parent.exists(): + if commondata.legacy: + raise CutsNotFound(f"Bad filter configuration. Could not find {cuts_path.parent}") + + # Else, this is a new dataset, is there a "legacy_name" different from the new name? + old_name = commondata.load().legacy_name + if old_name == setname: + raise CutsNotFound(f"Bad filter configuration. Could not find {cuts_path.parent}") + + # Then, check whether there are cuts with the corresponding old name + old_dir = cuts_path.parent.with_name(old_name) + if old_dir.exists(): + cuts_path = old_dir / f"FKMASK_{old_name}.dat" + else: + raise CutsNotFound( + f"Bad filter configuration. Could not find {cuts_path.parent} or {old_dir}" + ) + + if not cuts_path.exists(): + cuts_path = None + return Cuts(commondata, cuts_path) def check_internal_cuts(self, commondata, rules): return InternalCutsWrapper(commondata, rules) @@ -898,14 +1023,14 @@ def _remote_files_from_url(self, url, index, thing='files'): resp.raise_for_status() except Exception as e: raise RemoteLoaderError( - "Failed to fetch remote %s index %s: %s" % (thing, index_url, e) + "Failed to fetch remote {} index {}: {}".format(thing, index_url, e) ) from e try: info = resp.json()['files'] except Exception as e: raise RemoteLoaderError( - "Malformed index %s. Expecting json with a key 'files': %s" % (index_url, e) + "Malformed index {}. Expecting json with a key 'files': {}".format(index_url, e) ) from e return {file.split('.')[0]: url + file for file in info} @@ -976,7 +1101,7 @@ def nnpdf_pdfs(self): @property def downloadable_pdfs(self): - return set((*self.lhapdf_pdfs, *self.downloadable_fits, *self.nnpdf_pdfs)) + return {*self.lhapdf_pdfs, *self.downloadable_fits, *self.nnpdf_pdfs} def download_fit(self, fitname): if not fitname in self.remote_fits: diff --git a/validphys2/src/validphys/pineparser.py b/validphys2/src/validphys/pineparser.py index cfe1db6e1f..eb5d6cff8f 100644 --- a/validphys2/src/validphys/pineparser.py +++ b/validphys2/src/validphys/pineparser.py @@ -4,46 +4,22 @@ The FKTables for pineappl have ``pineappl.lz4`` and can be utilized directly with the ``pineappl`` cli as well as read with ``pineappl.fk_table`` """ +import logging + import numpy as np import pandas as pd -from reportengine.compat import yaml +from validphys.commondataparser import EXT, TheoryMeta from validphys.coredata import FKTableData -########### This part might eventually be part of whatever commondata reader -EXT = "pineappl.lz4" - - -class YamlFileNotFound(FileNotFoundError): - """ymldb file for dataset not found.""" +log = logging.getLogger(__name__) class GridFileNotFound(FileNotFoundError): """PineAPPL file for FK table not found.""" -def _load_yaml(yaml_file): - """Load a dataset.yaml file. - - Parameters - ---------- - yaml_file : Path - path of the yaml file for the given dataset - - Returns - ------- - dict : - noramlized parsed file content - """ - if not yaml_file.exists(): - raise YamlFileNotFound(yaml_file) - ret = yaml.safe_load(yaml_file.read_text()) - # Make sure the operations are upper-cased for compound-compatibility - ret["operation"] = "NULL" if ret["operation"] is None else ret["operation"].upper() - return ret - - -def pineko_yaml(yaml_file, grids_folder, check_grid_existence=True): +def pineko_yaml(yaml_file, grids_folder): """Given a yaml_file, returns the corresponding dictionary and grids. The dictionary contains all information and we return an extra field @@ -65,20 +41,14 @@ def pineko_yaml(yaml_file, grids_folder, check_grid_existence=True): paths: list(list(path)) List (of lists) with all the grids that will need to be loaded """ - yaml_content = _load_yaml(yaml_file) - - # Turn the operands and the members into paths (and check all of them exist) - ret = [] - for operand in yaml_content["operands"]: - tmp = [] - for member in operand: - p = grids_folder / f"{member}.{EXT}" - if not p.exists() and check_grid_existence: - raise GridFileNotFound(f"Failed to find {p}") - tmp.append(p) - ret.append(tmp) - - return yaml_content, ret + # TODO: the theory metadata can be found inside the commondata metadata + # however, for the time being, pineappl tables contain this information in the `yamldb` database + # they should be 100% compatible (and if they are not there is something wrong somewhere) + # so already at this stage, use TheoryMeta parser to get the metadata for pineappl theories + # Note also that we need to use this "parser" due to the usage of the name "operands" in the yamldb + theory_meta = TheoryMeta.parser(yaml_file) + member_paths = theory_meta.fktables_to_paths(grids_folder) + return theory_meta, member_paths def pineko_apfelcomb_compatibility_flags(gridpaths, metadata): @@ -101,12 +71,7 @@ def pineko_apfelcomb_compatibility_flags(gridpaths, metadata): repetition_flag: grid_name - - shifts: - only for ATLASZPT8TEVMDIST - the points in this dataset are not contiguous so the index is shifted - shifts: - grid_name: shift_int Returns ------- @@ -119,7 +84,8 @@ def pineko_apfelcomb_compatibility_flags(gridpaths, metadata): shift: list(int) Shift in the data index for each grid that forms the fktable """ - if metadata.get("apfelcomb") is None: + apfelcomb = metadata.apfelcomb + if apfelcomb is None: return None # Can't pathlib understand double suffixes? @@ -127,26 +93,20 @@ def pineko_apfelcomb_compatibility_flags(gridpaths, metadata): ret = {} # Check whether we have a normalization active and whether it affects any of the grids - if metadata["apfelcomb"].get("normalization") is not None: - norm_info = metadata["apfelcomb"]["normalization"] + if apfelcomb.normalization is not None: + norm_info = apfelcomb.normalization # Now fill the operands that need normalization ret["normalization"] = [norm_info.get(op, 1.0) for op in operands] # Check whether the repetition flag is active - if metadata["apfelcomb"].get("repetition_flag") is not None: + if apfelcomb.repetition_flag is not None: if len(operands) == 1: - ret["repetition_flag"] = operands[0] in metadata["apfelcomb"]["repetition_flag"] + ret["repetition_flag"] = operands[0] in apfelcomb.repetition_flag else: # Just for the sake of it, let's check whether we did something stupid - if any(op in metadata["apfelcomb"]["repetition_flag"] for op in operands): + if any(op in apfelcomb.repetition_flag for op in operands): raise ValueError(f"The yaml info for {metadata['target_dataset']} is broken") - # Check whether the dataset has shifts - # NOTE: this only happens for ATLASZPT8TEVMDIST, if that gets fixed we might as well remove it - if metadata["apfelcomb"].get("shifts") is not None: - shift_info = metadata["apfelcomb"]["shifts"] - ret["shifts"] = [shift_info.get(op, 0) for op in operands] - return ret @@ -239,7 +199,15 @@ def pineappl_reader(fkspec): """ from pineappl.fk_table import FkTable - pines = [FkTable.read(i) for i in fkspec.fkpath] + pines = [] + for fk_path in fkspec.fkpath: + try: + pines.append(FkTable.read(fk_path)) + except BaseException as e: + # Catch absolutely any error coming from pineappl, give some info and immediately raise + log.error(f"Fatal error reading {fk_path}") + raise e + cfactors = fkspec.load_cfactors() # Extract metadata from the first grid @@ -261,6 +229,14 @@ def pineappl_reader(fkspec): apfelcomb = pineko_apfelcomb_compatibility_flags(fkspec.fkpath, fkspec.metadata) + # Process the shifts (if any), shifts is a dictionary with {fktable_name: shift_value} + # since this parser doesn't know about operations, we need to convert it to a list + # then we just iterate over the fktables and apply the shift in the right order + shifts = None + if (shift_info := fkspec.metadata.shifts) is not None: + fknames = [i.name.replace(f".{EXT}", "") for i in fkspec.fkpath] + shifts = [shift_info.get(fname, 0) for fname in fknames] + # fktables in pineapplgrid are for obs = fk * f while previous fktables were obs = fk * xf # prepare the grid all tables will be divided by if hadronic: @@ -289,8 +265,9 @@ def pineappl_reader(fkspec): raw_fktable = raw_fktable[0:1] n = 1 protected = True - if apfelcomb.get("shifts") is not None: - ndata += apfelcomb["shifts"][i] + + if shifts is not None: + ndata += shifts[i] # Add empty points to ensure that all fktables share the same x-grid upon convolution missing_x_points = np.setdiff1d(xgrid, p.x_grid(), assume_unique=True) @@ -300,7 +277,7 @@ def pineappl_reader(fkspec): if hadronic: raw_fktable = np.insert(raw_fktable, miss_index, 0.0, axis=3) # Check conversion factors and remove the x* from the fktable - raw_fktable *= fkspec.metadata.get("conversion_factor", 1.0) / xdivision + raw_fktable *= fkspec.metadata.conversion_factor / xdivision # Create the multi-index for the dataframe # for optimized pineappls different grids can potentially have different indices @@ -322,6 +299,13 @@ def pineappl_reader(fkspec): # Finallly concatenate all fktables, sort by flavours and fill any holes sigma = pd.concat(partial_fktables, sort=True, copy=False).fillna(0.0) + # Check whether this is a 1-point normalization fktable and, if that's the case, protect! + if fkspec.metadata.operation == "RATIO" and ndata == 1 and len(pines) == 1: + # it _might_ be, check whether it is the divisor fktable + divisor = fkspec.metadata.FK_tables[-1][0] + name = fkspec.fkpath[0].name.replace(f".{EXT}", "") + protected = divisor == name + return FKTableData( sigma=sigma, ndata=ndata, diff --git a/validphys2/src/validphys/plotoptions/__init__.py b/validphys2/src/validphys/plotoptions/__init__.py index ed3c59a2b7..e69de29bb2 100644 --- a/validphys2/src/validphys/plotoptions/__init__.py +++ b/validphys2/src/validphys/plotoptions/__init__.py @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Mar 11 19:27:44 2016 - -@author: Zahari Kassabov -""" - -from validphys.plotoptions.core import ( # analysis:ignore - get_info, - get_xq2map, - kitable, - transform_result, -) diff --git a/validphys2/src/validphys/plotoptions/core.py b/validphys2/src/validphys/plotoptions/core.py index 941da091c8..0233c7a825 100644 --- a/validphys2/src/validphys/plotoptions/core.py +++ b/validphys2/src/validphys/plotoptions/core.py @@ -1,9 +1,3 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Mar 11 19:27:44 2016 - -@author: Zahari Kassabov -""" import dataclasses import enum import logging @@ -12,28 +6,17 @@ import numpy as np import pandas as pd -from validobj import ValidationError -from reportengine.compat import yaml from reportengine.floatformatting import format_number -from reportengine.utils import ChainMap, get_functions -from validphys.core import CommonDataSpec, Cuts, DataSetSpec, InternalCutsWrapper +from reportengine.utils import ChainMap +from validphys.core import CommonDataSpec, DataSetSpec from validphys.coredata import CommonData -from validphys.plotoptions import kintransforms, labelers, resulttransforms -from validphys.plotoptions.utils import apply_to_all_columns, get_subclasses +from validphys.plotoptions.plottingoptions import PlottingOptions, default_labels, labeler_functions +from validphys.plotoptions.utils import apply_to_all_columns from validphys.utils import parse_yaml_inp log = logging.getLogger(__name__) -default_labels = ('idat', 'k1', 'k2', 'k3') - -labeler_functions = get_functions(labelers) -transform_functions = get_subclasses(kintransforms, kintransforms.Kintransform) -result_functions = get_functions(resulttransforms) - -ResultTransformations = enum.Enum('ResultTransformations', list(result_functions.keys())) -TransformFunctions = enum.Enum('TransformFunctions', list(transform_functions.keys())) - def get_info(data, *, normalize=False, cuts=None, use_plotfiles=True): """Retrieve and process the plotting information for the input data (which could @@ -89,6 +72,7 @@ def __init__( x_label=None, x_scale=None, y_scale=None, + ds_metadata=None, process_description='-', nnpdf31_process, **kwargs, @@ -113,6 +97,8 @@ def __init__( self.y_scale = y_scale self.dataset_label = dataset_label self.process_description = process_description + # Metadata of the dataset + self.ds_metadata = ds_metadata def name_to_label(self, name): if name in labeler_functions: @@ -124,6 +110,10 @@ def name_to_label(self, name): return name return self.kinlabels[ix] + @property + def process_type(self): + return self.ds_metadata.process_type + @property def xlabel(self): if self._x_label is not None: @@ -144,33 +134,59 @@ def group_label(self, same_vals, groupby): return f'({same_vals[0]})' pieces = [] for column, val in zip(groupby, same_vals): - label = self.name_to_label(column) - if isinstance(val, numbers.Real): - val = format_number(val) - pieces.append('%s = %s' % (label, val)) + if ( + self.ds_metadata is not None + and not self.ds_metadata.is_ported_dataset + and column in ('k1', 'k2', 'k3') + ): + # If this is a new-style commondata (it has metadata) + # _and_ it is not simply an automatic port of the old dataset + # _and_ we have the information on the requested column... + # then we can have a nicer label! + ix = ('k1', 'k2', 'k3').index(column) + var_key = self.ds_metadata.kinematic_coverage[ix] + pieces.append(self.ds_metadata.kinematics.apply_label(var_key, val)) + else: + label = self.name_to_label(column) + if isinstance(val, numbers.Real): + val = format_number(val) + pieces.append('{} = {}'.format(label, val)) + return '%s' % ' '.join(pieces) @classmethod def from_commondata(cls, commondata, cuts=None, normalize=False): plot_params = ChainMap() - if commondata.plotfiles: - for file in commondata.plotfiles: - with open(file) as f: - processed_input = yaml.round_trip_load(f) - pf = parse_yaml_inp(processed_input, PlottingFile, file) + kinlabels = commondata.plot_kinlabels + + if commondata.legacy: + if commondata.plotfiles: + for file in commondata.plotfiles: + pf = parse_yaml_inp(file, PlottingFile) config_params = dataclasses.asdict(pf, dict_factory=dict_factory) - plot_params = plot_params.new_child(config_params) - if normalize and 'normalize' in plot_params: - plot_params = plot_params.new_child(config_params['normalize']) - if 'dataset_label' not in plot_params: - log.warning(f"'dataset_label' key not found in {file}") - plot_params['dataset_label'] = commondata.name + plot_params = plot_params.new_child(config_params) + if normalize and 'normalize' in plot_params: + plot_params = plot_params.new_child(config_params['normalize']) + if 'dataset_label' not in plot_params: + log.warning(f"'dataset_label' key not found in {file}") + plot_params['dataset_label'] = commondata.name + + else: + plot_params = {'dataset_label': commondata.name} else: - plot_params = {'dataset_label': commondata.name} + pcd = commondata.metadata.plotting_options + config_params = dataclasses.asdict(pcd, dict_factory=dict_factory) + plot_params = plot_params.new_child(config_params) + # Add a reference to the metadata to the plot_params so that it is stored in PlotInfo + plot_params["ds_metadata"] = commondata.metadata + # If normalize, we need to update some of the parameters + if normalize and pcd.normalize is not None: + plot_params = plot_params.new_child(pcd.normalize) - kinlabels = commondata.plot_kinlabels kinlabels = plot_params['kinematics_override'].new_labels(*kinlabels) + plot_params["process_type"] = commondata.metadata.process_type + if "extra_labels" in plot_params and cuts is not None: cut_extra_labels = { k: [v[i] for i in cuts] for k, v in plot_params["extra_labels"].items() @@ -202,76 +218,6 @@ class KinLabel(enum.Enum): k3 = enum.auto() -class Scale(enum.Enum): - linear = enum.auto() - log = enum.auto() - symlog = enum.auto() - - -@dataclasses.dataclass -class PlottingOptions: - func_labels: dict = dataclasses.field(default_factory=dict) - dataset_label: typing.Optional[str] = None - experiment: typing.Optional[str] = None - nnpdf31_process: typing.Optional[str] = None - data_reference: typing.Optional[str] = None - theory_reference: typing.Optional[str] = None - process_description: typing.Optional[str] = None - y_label: typing.Optional[str] = None - x_label: typing.Optional[str] = None - - kinematics_override: typing.Optional[TransformFunctions] = None - - result_transform: typing.Optional[ResultTransformations] = None - - # TODO: change this to x: typing.Optional[KinLabel] = None - # but this currently fails CI because some datasets have - # a kinlabel of $x_1$ or " "!! - x: typing.Optional[str] = None - - x_scale: typing.Optional[Scale] = None - y_scale: typing.Optional[Scale] = None - - line_by: typing.Optional[list] = None - figure_by: typing.Optional[list] = None - - extra_labels: typing.Optional[typing.Mapping[str, typing.List]] = None - - def parse_figure_by(self): - if self.figure_by is not None: - for el in self.figure_by: - if el in labeler_functions: - self.func_labels[el] = labeler_functions[el] - - def parse_line_by(self): - if self.line_by is not None: - for el in self.line_by: - if el in labeler_functions: - self.func_labels[el] = labeler_functions[el] - - def parse_x(self): - if self.x is not None and self.x not in self.all_labels: - raise ValidationError( - f"The label {self.x} is not in the set of known labels {self.all_labels}" - ) - - @property - def all_labels(self): - if self.extra_labels is None: - return set(default_labels) - return set(self.extra_labels.keys()).union(set(default_labels)) - - def __post_init__(self): - if self.kinematics_override is not None: - self.kinematics_override = transform_functions[self.kinematics_override.name]() - if self.result_transform is not None: - self.result_transform = result_functions[self.result_transform.name] - - self.parse_figure_by() - self.parse_line_by() - self.parse_x() - - @dataclasses.dataclass class PlottingFile(PlottingOptions): normalize: typing.Optional[PlottingOptions] = None @@ -309,6 +255,7 @@ def kitable(data, info, *, cuts=None): if isinstance(data, CommonData) and cuts is not None: table = table.loc[cuts.load()] table.index.name = default_labels[0] + if info.kinematics_override: transform = apply_to_all_columns(table, info.kinematics_override) table = pd.DataFrame(np.array(transform).T, columns=table.columns, index=table.index) @@ -330,7 +277,6 @@ def kitable(data, info, *, cuts=None): table[label] = value nreal_labels = len(table.columns) - for label, func in funcs: # Pass only the "real" labels and not the derived functions table[label] = apply_to_all_columns(table.iloc[:, :nreal_labels], func) @@ -351,6 +297,11 @@ def transform_result(cv, error, kintable, info): def get_xq2map(kintable, info): """Return a tuple of (x,Q²) from the kinematic values defined in kitable - (usually obtained by calling ``kitable``) using machinery specified in - ``info``""" - return apply_to_all_columns(kintable, info.kinematics_override.xq2map) + (usually obtained by calling ``kitable``) using the process type if available + + Otherwise it will fallback to the legacy mode, i.e., "using machinery specified in``info`` + """ + try: + return info.process_type.xq2map(kintable, info.ds_metadata) + except AttributeError: + return apply_to_all_columns(kintable, info.kinematics_override.xq2map) diff --git a/validphys2/src/validphys/plotoptions/kintransforms.py b/validphys2/src/validphys/plotoptions/kintransforms.py index 5ba7878ea3..a37ae6f6ec 100644 --- a/validphys2/src/validphys/plotoptions/kintransforms.py +++ b/validphys2/src/validphys/plotoptions/kintransforms.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ kintransforms.py @@ -13,18 +12,24 @@ .. code-block:: python class mytransform: - def __call__(self, k1:np.array,k2:np.array,k3:np.array) -> (np.array, np.array, np.array): - #Transform kinematics + def __call__( + self, k1: np.array, k2: np.array, k3: np.array + ) -> (np.array, np.array, np.array): + # Transform kinematics ... return trasformed_k1, transformed_k2, transformed_k3 - def new_labels(self, old_label1:str, old_label2:str, old_label3:str) -> (str, str, str): - #Transform labels + def new_labels( + self, old_label1: str, old_label2: str, old_label3: str + ) -> (str, str, str): + # Transform labels return transformed_label1, transformed_label2, transformed_label3 - #Using as input the result of __call__ as well as any new labels - def xq2map(self, k1:np.array,k2:np.array,k3:np.array,**extra_labels) -> (np.array, np.array): - #calculate (x,Q²) + # Using as input the result of __call__ as well as any new labels + def xq2map( + self, k1: np.array, k2: np.array, k3: np.array, **extra_labels + ) -> (np.array, np.array): + # calculate (x,Q²) return x, Q2 @@ -32,29 +37,31 @@ def xq2map(self, k1:np.array,k2:np.array,k3:np.array,**extra_labels) -> (np.arra .. code-block:: python - {'DIS': ('$x$', '$Q^2 (GeV^2)$', '$y$'), - 'DYP': ('$y$', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWJ_JPT': ('$p_T (GeV)$', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWJ_JRAP': ('$\\eta/y$', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWJ_MLL': ('$M_{ll} (GeV)$', '$M_{ll}^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWJ_PT': ('$p_T (GeV)$', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWJ_PTRAP': ('$\\eta/y$', '$p_T^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWJ_RAP': ('$\\eta/y$', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWK_MLL': ('$M_{ll} (GeV)$', '$M_{ll}^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWK_PT': ('$p_T$ (GeV)', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWK_PTRAP': ('$\\eta/y$', '$p_T^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'EWK_RAP': ('$\\eta/y$', '$M^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'HIG_RAP': ('$y$', '$M_H^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'HQP_MQQ': ('$M^{QQ} (GeV)$', '$\\mu^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'HQP_PTQ': ('$p_T^Q (GeV)$', '$\\mu^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'HQP_PTQQ': ('$p_T^{QQ} (GeV)$', '$\\mu^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'HQP_YQ': ('$y^Q$', '$\\mu^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'HQP_YQQ': ('$y^{QQ}$', '$\\mu^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'INC': ('$0$', '$\\mu^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'JET': ('$\\eta$', '$p_T^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'DIJET': ('$\\eta$', '$m_{12} (GeV)$', '$\\sqrt{s} (GeV)$'), - 'PHT': ('$\\eta_\\gamma$', '$E_{T,\\gamma}^2 (GeV^2)$', '$\\sqrt{s} (GeV)$'), - 'SIA': ('$z$', '$Q^2 (GeV^2)$', '$y$')} + { + "DIS": ("$x$", "$Q^2 (GeV^2)$", "$y$"), + "DYP": ("$y$", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWJ_JPT": ("$p_T$ (GeV)", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWJ_JRAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWJ_MLL": ("$M_{ll}$ (GeV)", "$M_{ll}^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWJ_PT": ("$p_T$ (GeV)", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWJ_PTRAP": ("$\\eta/y$", "$p_T^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWJ_RAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWK_MLL": ("$M_{ll}$ (GeV)", "$M_{ll}^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWK_PT": ("$p_T$ (GeV)", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWK_PTRAP": ("$\\eta/y$", "$p_T^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "EWK_RAP": ("$\\eta/y$", "$M^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "HIG_RAP": ("$y$", "$M_H^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "HQP_MQQ": ("$M^{QQ}$ (GeV)", "$\\mu^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "HQP_PTQ": ("$p_T^Q$ (GeV)", "$\\mu^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "HQP_PTQQ": ("$p_T^{QQ}$ (GeV)", "$\\mu^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "HQP_YQ": ("$y^Q$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "HQP_YQQ": ("$y^{QQ}$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "INC": ("$0$", "$\\mu^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "JET": ("$\\eta$", "$p_T^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "DIJET": ("$\\eta$", "$m_{12}$ (GeV)", "$\\sqrt{s}$ (GeV)"), + "PHT": ("$\\eta_\\gamma$", "$E_{T,\\gamma}^2 (GeV^2)$", "$\\sqrt{s}$ (GeV)"), + "SIA": ("$z$", "$Q^2 (GeV^2)$", "$y$"), + } @@ -69,6 +76,32 @@ def xq2map(self, k1:np.array,k2:np.array,k3:np.array,**extra_labels) -> (np.arra import numpy as np +# TODO +class identity: + """The identity transformation is a transitional operation for the implementation + of the new commondata + + It takes the kinematics as they come and leave them unchanged. + The final version will either + 1. Remove the necessitity for the key in the PlotOptions + 2. Change all "identities" to utilize one of the transformations + The selection depends on the final decision on the x-q2 map and the possibilities are + either: + 1. Include the xq2 information as part of the kinematics when no override is provided + 2. Enforce that all datasets include an override to define how the x-q2 map should be computed. + """ + + def __call__(self, k1, k2, k3): + return k1, k2, k3 + + def new_labels(self, k1, k2, k3): + return k1, k2, k3 + + def xq2map(self, k1, k2, k3, **extra_labels): + # This is going to be a problem + return k1, k2 + + class Kintransform(metaclass=abc.ABCMeta): @classmethod def __subclasshook__(cls, other): @@ -202,17 +235,17 @@ def xq2map(self, k1, k2, k3, **extra_labels): class dyp_sqrt_scale(SqrtScaleMixin, DYXQ2MapMixin): - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class jet_sqrt_scale(SqrtScaleMixin, JETXQ2MapMixin): def new_labels(self, *old_labels): - return ('$|y|$', '$p_T$ (GeV)', r'$\sqrt{s} (GeV)$') + return ('$|y|$', '$p_T$ (GeV)', r'$\sqrt{s}$ (GeV)') class dijet_sqrt_scale(SqrtScaleMixin, DIJETXQ2MapMixin): def new_labels(self, *old_labels): - return ('$|y|$', '$m_{12}$ (GeV)', r'$\sqrt{s} (GeV)$') + return ('$|y|$', '$m_{12}$ (GeV)', r'$\sqrt{s}$ (GeV)') class dijet_sqrt_scale_ATLAS(SqrtScaleMixin, DIJETATLASXQ2MapMixin): @@ -220,7 +253,7 @@ def __call__(self, k1, k2, k3): return k1, k2, k3 def new_labels(self, *old_labels): - return ('$|y^*|$', '$m_{12}$ (GeV)', r'$\sqrt{s} (GeV)$') + return ('$|y^*|$', '$m_{12}$ (GeV)', r'$\sqrt{s}$ (GeV)') class dijet_CMS_3D(SqrtScaleMixin, DIJET3DXQ2MapMixin): @@ -230,7 +263,7 @@ def new_labels(self, *old_labels): class dijet_CMS_5TEV(SqrtScaleMixin, DIJET3DXQ2MapMixin): def new_labels(self, *old_labels): - return ('$\eta_{dijet}$', '$p_{T,avg}$ (GeV)', r'$\sqrt{s} (GeV)$') + return (r'$\eta_{dijet}$', '$p_{T,avg}$ (GeV)', r'$\sqrt{s}$ (GeV)') class dis_sqrt_scale(DISXQ2MapMixin): @@ -239,93 +272,93 @@ def __call__(self, k1, k2, k3): return k1, np.sqrt(k2), np.ceil(ecm) def new_labels(self, *old_labels): - return ('$x$', '$Q$ (GeV)', r'$\sqrt{s} (GeV)$') + return ('$x$', '$Q$ (GeV)', r'$\sqrt{s}$ (GeV)') class ewj_jpt_sqrt_scale(SqrtScaleMixin, EWPTXQ2MapMixin): # okay but it does not exist - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class ewj_jrap_sqrt_scale( SqrtScaleMixin, DYXQ2MapMixin ): # EWJ_JRAP->DY ----> okay but it does not exist - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class ewj_mll_sqrt_scale( SqrtScaleMixin, DYMXQ2MapMixin ): # EWJ_MLL->DYm ----> okay but it does not exist - qlabel = '$M_{ll} (GeV)$' + qlabel = '$M_{ll}$ (GeV)' class ewj_pt_sqrt_scale( SqrtScaleMixin, EWPTXQ2MapMixin ): # EWJ_PT->DY ----> Zpt, okay but it does not exist - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class ewj_ptrap_sqrt_scale( SqrtScaleMixin, DYXQ2MapMixin ): # EWJ_PTRAP -> DY okay, but it does not exist - qlabel = r'$p_T (GeV)$' + qlabel = r'$p_T$ (GeV)' class ewj_rap_sqrt_scale( SqrtScaleMixin, DYXQ2MapMixin ): # EWJ_RAP -> DY okay (can we get rid of it also in commondata?) - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class ewk_mll_sqrt_scale(SqrtScaleMixin, DYMXQ2MapMixin): # EWK_MLL -> DYM okay - qlabel = '$M_{ll} (GeV)$' + qlabel = '$M_{ll}$ (GeV)' class ewk_pt_sqrt_scale(SqrtScaleMixin, EWPTXQ2MapMixin): # EWK_PT -> Zpt okay - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class ewk_ptrap_sqrt_scale(SqrtScaleMixin, DYXQ2MapMixin): # EWK_PT -> DY okay - qlabel = r'$p_T (GeV)$' + qlabel = r'$p_T$ (GeV)' class ewk_rap_sqrt_scale(SqrtScaleMixin, DYXQ2MapMixin): # EWK_RAP -> DY okay - qlabel = '$M (GeV)$' + qlabel = '$M$ (GeV)' class hig_rap_sqrt_scale(SqrtScaleMixin, DYXQ2MapMixin): # okay, but it does not exist - qlabel = '$M_H (GeV)$' + qlabel = '$M_H$ (GeV)' class hqp_mqq_sqrt_scale(SqrtScaleMixin, DYMXQ2MapMixin): # HQP_MQQ -> DYM okay - qlabel = r'$\mu (GeV)$' + qlabel = r'$\mu$ (GeV)' class hqp_ptq_sqrt_scale(SqrtScaleMixin, HQPTXQ2MapMixin): # HQP_PTQ -> HQPT okay - qlabel = r'$\mu (GeV)$' + qlabel = r'$\mu$ (GeV)' class hqp_ptqq_sqrt_scale(SqrtScaleMixin, HQQPTXQ2MapMixin): # HQP_PTQQ -> HQQPT okay - qlabel = r'$\mu (GeV)$' + qlabel = r'$\mu$ (GeV)' class hqp_yq_sqrt_scale(SqrtScaleMixin, JETXQ2MapMixin): # HQP_YQ->JETXQ2 okay - qlabel = r'$\mu (GeV)$' + qlabel = r'$\mu$ (GeV)' class hqp_yqq_sqrt_scale(SqrtScaleMixin, DYXQ2MapMixin): # HQP_YQQ->DYXQ2 okay - qlabel = r'$\mu (GeV)$' + qlabel = r'$\mu$ (GeV)' class inc_sqrt_scale(SqrtScaleMixin, DYMXQ2MapMixin): # INC -> DYM okay - qlabel = r'$\mu (GeV)$' + qlabel = r'$\mu$ (GeV)' class pht_sqrt_scale(SqrtScaleMixin, DYXQ2MapMixin): # okay but not in commondata - qlabel = r'$E_{T,\gamma} (GeV)$' + qlabel = r'$E_{T,\gamma}$ (GeV)' class sia_sqrt_scale(SqrtScaleMixin, DISXQ2MapMixin): # okay but not in commondata - qlabel = '$Q (GeV)$' + qlabel = '$Q$ (GeV)' class nmc_process(DISXQ2MapMixin): @@ -354,7 +387,7 @@ def __call__(self, k1, k2, k3): return k1, np.sqrt(k2), np.ceil(ecm) def new_labels(self, *old_labels): - return ('$x$', '$Q$ (GeV)', r'$\sqrt{s} (GeV)$') + return ('$x$', '$Q$ (GeV)', r'$\sqrt{s}$ (GeV)') class ewk_pseudorapity_sqrt_scale(ewk_rap_sqrt_scale): diff --git a/validphys2/src/validphys/plotoptions/plottingoptions.py b/validphys2/src/validphys/plotoptions/plottingoptions.py new file mode 100644 index 0000000000..72fd616db7 --- /dev/null +++ b/validphys2/src/validphys/plotoptions/plottingoptions.py @@ -0,0 +1,112 @@ +""" +This module is separated from other plotoptions modules to avoid circular dependencies + +The class PlottingOptions is used by the commondata reader to check that the plotting options +set in `plotting` are acceptable. +""" + +import dataclasses +import enum +import typing + +from validobj import ValidationError + +from reportengine.utils import get_functions +from validphys.plotoptions import kintransforms, labelers, resulttransforms +from validphys.plotoptions.utils import get_subclasses + +default_labels = ('idat', 'k1', 'k2', 'k3') + + +labeler_functions = get_functions(labelers) +transform_functions = get_subclasses(kintransforms, kintransforms.Kintransform) +result_functions = get_functions(resulttransforms) + + +ResultTransformations = enum.Enum('ResultTransformations', list(result_functions.keys())) +TransformFunctions = enum.Enum('TransformFunctions', list(transform_functions.keys())) + + +class Scale(enum.Enum): + linear = enum.auto() + log = enum.auto() + symlog = enum.auto() + + +@dataclasses.dataclass +class PlottingOptions: + func_labels: dict = dataclasses.field(default_factory=dict) + dataset_label: typing.Optional[str] = None + experiment: typing.Optional[str] = None + nnpdf31_process: typing.Optional[str] = None + data_reference: typing.Optional[str] = None + theory_reference: typing.Optional[str] = None + process_description: typing.Optional[str] = None + y_label: typing.Optional[str] = None + x_label: typing.Optional[str] = None + + kinematics_override: typing.Optional[TransformFunctions] = None + + result_transform: typing.Optional[ResultTransformations] = None + + # TODO: change this to x: typing.Optional[KinLabel] = None + # but this currently fails CI because some datasets have + # a kinlabel of $x_1$ or " "!! + x: typing.Optional[str] = None + + # TODO: the old commondata uses x, the new uses plot_x + # the old commondata only allowed the x to be k1, k2, k3 or idat + plot_x: typing.Optional[str] = None + + x_scale: typing.Optional[Scale] = None + y_scale: typing.Optional[Scale] = None + + line_by: typing.Optional[list] = None + figure_by: typing.Optional[list] = None + + extra_labels: typing.Optional[typing.Mapping[str, typing.List]] = None + + # TODO: the old commondata saved this normalize key in a different way + # need to check it is equivalent in all dataset it appears before merging + normalize: typing.Optional[dict] = None + + # The new commondata files might need to change some variables inside the plotting + # avoid doing it twice + already_digested: typing.Optional[bool] = False + + def parse_figure_by(self): + if self.figure_by is not None: + for el in self.figure_by: + if el in labeler_functions: + self.func_labels[el] = labeler_functions[el] + + def parse_line_by(self): + if self.line_by is not None: + for el in self.line_by: + if el in labeler_functions: + self.func_labels[el] = labeler_functions[el] + + def parse_x(self): + if self.x is not None and self.x not in self.all_labels: + raise ValidationError( + f"The label {self.x} is not in the set of known labels {self.all_labels}" + ) + + @property + def all_labels(self): + if self.extra_labels is None: + return set(default_labels) + return set(self.extra_labels.keys()).union(set(default_labels)) + + def __post_init__(self): + if self.kinematics_override is not None: + self.kinematics_override = transform_functions[self.kinematics_override.name]() + if self.result_transform is not None: + self.result_transform = result_functions[self.result_transform.name] + + # TODO: + # add a deprecation warning in case anyone try to use `x` in the new commondata + + self.parse_figure_by() + self.parse_line_by() + self.parse_x() diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py new file mode 100644 index 0000000000..dfba77a6cf --- /dev/null +++ b/validphys2/src/validphys/process_options.py @@ -0,0 +1,258 @@ +""" + Module to hold process dependent options + + Only variables included in the `_Vars` enum and processes included in the ``Processes`` dictionary are allowed. +""" +import dataclasses +from typing import Callable, Optional, Tuple, Union + +import numpy as np +from validobj.custom import Parser + +TMASS = 173.3 + + +class _Vars: + x = "x" + Q2 = "Q2" + Q = "Q" + y = "y" + pT = "pT" + ET = "ET" + sqrts = "sqrts" + ystar = "ystar" + ydiff = "ydiff" + m_jj = "m_jj" + p_T2 = "p_T2" + y_t = "y_t" + y_ttBar = "y_ttBar" + m_t2 = "m_t2" + pT_t = "pT_t" + m_ttBar = "m_ttBar" + + +def _map_to_metadata(kin_df, metadata): + """Read the 3 columns dataframe corresponding to the values set in the + ``kinematic_coverage`` field into a dictionary defining the name of the variables. + Adds the special "sqrts" key unless it is already part of the kinematic coverage. + """ + kin_cov = metadata.kinematic_coverage + kins = {} + for kin_lab, kin_values in zip(kin_cov, kin_df.values.T): + kins[kin_lab] = kin_values + + if "sqrts" not in kin_cov: + kins[_Vars.sqrts] = metadata.cm_energy + + return kins + + +def _get_or_fail(kin_dict, list_of_accepted): + """Loop over the list of accepted variables to check whether it is included in kin_dict + otherwise fail""" + for var in list_of_accepted: + if var in kin_dict: + return kin_dict[var] + raise KeyError(f"Need one of the following variables {list_of_accepted} to continue") + + +@dataclasses.dataclass(frozen=True) +class _Process: + name: str + description: str + accepted_variables: Tuple[str] + xq2map_function: Optional[Callable] = None + + def __hash__(self): + return hash(self.name) + + def same_kin_variables(self, kin_cov): + """Check if the kinematic variables from the kinematic coverage are the same + of the accepted variables.""" + # Accepting in any case the legacy variables + if kin_cov == ["k1", "k2", "k3"]: + return True + # We check if kin_cov is a subset of self.accepted_variables + return set(self.accepted_variables).union(set(kin_cov)) == set(self.accepted_variables) + + def xq2map(self, kin_df, metadata): + """Transform the kinematics dataframe into a x q dataframe + For double hadronic processes the number of output point will be 2x ninput + These functions should have access to both the kinematic dataframe and the + metadata of the commondata + """ + # Remove ``extra labels`` from kin_df + if metadata.plotting.extra_labels is not None: + for extra_label in metadata.plotting.extra_labels: + kin_df = kin_df.drop(columns=extra_label) + + # Check if the kinematic variables defined in metadata corresponds to the + # accepted variables + if not self.same_kin_variables(metadata.kinematic_coverage): + raise NotImplementedError( + f"kinematic variables are not supported for process {self.name}. You are using {metadata.kinematic_coverage}, please use {self.accepted_variables} ({metadata.name})" + ) + + if self.xq2map_function is None: + raise NotImplementedError(f"xq2map is not implemented for {self.name}") + + # check that all variables in the dataframe are accepted by this process + try: + return self.xq2map_function(_map_to_metadata(kin_df, metadata)) + except KeyError as e: + raise NotImplementedError( + f"Error trying to compute xq2map for process {self.name} ({metadata.name})" + ) from e + + def __str__(self): + return self.name + + +def _dis_xq2map(kin_dict): + """In the old style commondata, the variables in the dataframe were ``x, Q2, y`` + but due to the transformations that happen inside validphys they become ``x, Q, y`` + """ + x = kin_dict["k1"] + q = kin_dict["k2"] + return x, q * q + + +def _jets_xq2map(kin_dict): + # Then compute x, Q2 + pT = kin_dict[_Vars.pT] + ratio = pT / kin_dict[_Vars.sqrts] + x1 = 2 * ratio * np.exp(kin_dict[_Vars.y]) + x2 = 2 * ratio * np.exp(-kin_dict[_Vars.y]) + q2 = pT * pT + x = np.concatenate((x1, x2)) + return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) + + +def _dijets_xq2map(kin_dict): + # Here we can have either ystar or ydiff, but in either case we need to do the same + ylab = _get_or_fail(kin_dict, [_Vars.ystar, _Vars.ydiff]) + # Then compute x, Q2 + ratio = kin_dict[_Vars.m_jj] / kin_dict[_Vars.sqrts] + x1 = ratio * np.exp(ylab) + x2 = ratio * np.exp(-ylab) + q2 = kin_dict[_Vars.m_jj] * kin_dict[_Vars.m_jj] + x = np.concatenate((x1, x2)) + return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) + + +def _hqp_yq_xq2map(kin_dict): + # Compute x, Q2 + if {"k1", "k2", "k3"} <= kin_dict.keys(): + kin_dict[_Vars.y_t] = kin_dict["k1"] + kin_dict[_Vars.m_t2] = kin_dict["k2"] + kin_dict[_Vars.sqrts] = kin_dict["k3"] + + mass2 = _get_or_fail(kin_dict, [_Vars.m_t2, _Vars.m_ttBar]) + + ratio = np.sqrt(mass2) / kin_dict[_Vars.sqrts] + x1 = ratio * np.exp(kin_dict[_Vars.y_t]) + x2 = ratio * np.exp(-kin_dict[_Vars.y_t]) + q2 = mass2 + x = np.concatenate((x1, x2)) + return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) + + +def _hqp_yqq_xq2map(kin_dict): + # Compute x, Q2 + ratio = np.sqrt(kin_dict[_Vars.m_t2]) / kin_dict[_Vars.sqrts] + x1 = ratio * np.exp(kin_dict[_Vars.y_ttBar]) + x2 = ratio * np.exp(-kin_dict[_Vars.y_ttBar]) + q2 = kin_dict[_Vars.m_t2] + x = np.concatenate((x1, x2)) + return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) + + +def _hqp_ptq_xq2map(kin_dict): + # Compute x, Q2 + QMASS2 = TMASS * TMASS + Q = np.sqrt(QMASS2 + kin_dict[_Vars.pT_t] * kin_dict[_Vars.pT_t]) + kin_dict[_Vars.pT_t] + return Q / kin_dict[_Vars.sqrts], Q * Q + + +def _displusjet_xq2map(kin_dict): + """Computes x and q2 mapping for a DIS + J (J) process + Uses Q2 as provided by the dictionary of kinematics variables + and x = Q**4 / s / (pt**2 - Q**2) + """ + q2 = kin_dict[_Vars.Q2] + # Consider ET and pT as equivalent for the purposes of the xq2 plot + pt = _get_or_fail(kin_dict, [_Vars.ET, _Vars.pT]) + s = kin_dict[_Vars.sqrts] ** 2 + x = q2 * q2 / s / (pt**2 - q2) + return x, q2 + + +DIS = _Process( + "DIS", + "Deep Inelastic Scattering", + accepted_variables=(_Vars.x, _Vars.Q2, _Vars.y, _Vars.Q), + xq2map_function=_dis_xq2map, +) + +JET = _Process( + "JET", + "Single Jet production", + accepted_variables=(_Vars.y, _Vars.pT, _Vars.sqrts, _Vars.p_T2), + xq2map_function=_jets_xq2map, +) + +DIJET = _Process( + "DIJET", + "DiJets Production", + accepted_variables=(_Vars.ystar, _Vars.m_jj, _Vars.sqrts, _Vars.ydiff), + xq2map_function=_dijets_xq2map, +) + +HQP_YQ = _Process( + "HQP_YQ", + "Normalized differential cross section w.r.t. absolute rapidity of t", + accepted_variables=(_Vars.y_t, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar), + xq2map_function=_hqp_yq_xq2map, +) + +HQP_YQQ = _Process( + "HQP_YQQ", + "Differential cross section w.r.t. absolute rapidity of ttBar", + accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts), + xq2map_function=_hqp_yqq_xq2map, +) + +HQP_PTQ = _Process( + "HQP_PTQ", + "Normalized double differential cross section w.r.t. absolute rapidity and transverse momentum of t", + accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts), + xq2map_function=_hqp_ptq_xq2map, +) + + +HERAJET = _Process( + "HERAJET", + "DIS + j production", + accepted_variables=(_Vars.pT, _Vars.Q2, _Vars.sqrts, _Vars.ET), + xq2map_function=_displusjet_xq2map, +) + + +PROCESSES = { + "DIS": DIS, + "DIS_NC": dataclasses.replace(DIS, name="DIS_NC"), + "DIS_CC": dataclasses.replace(DIS, name="DIS_CC"), + "DIS_NCE": dataclasses.replace(DIS, name="DIS_NCE"), + "JET": JET, + "DIJET": DIJET, + "HQP_YQ": HQP_YQ, + "HQP_YQQ": HQP_YQQ, + "HQP_PTQ": HQP_PTQ, + "HERAJET": HERAJET, + "HERADIJET": dataclasses.replace(HERAJET, name="HERADIJET", description="DIS + jj production"), +} + + +@Parser +def ValidProcess(process_name) -> Union[_Process, str]: + return PROCESSES.get(process_name.upper(), process_name.upper()) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 9c7d21244a..1551ae75d8 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Tools to obtain and analyse the pseudodata that was seen by the neural networks during the fitting. @@ -16,6 +15,7 @@ dataset_inputs_covmat_from_systematics, sqrt_covmat, ) +from validphys.datafiles import legacy_to_new_map FILE_PREFIX = "datacuts_theory_fitting_" @@ -101,8 +101,21 @@ def read_replica_pseudodata(fit, context_index, replica): tr["type"], val["type"] = "training", "validation" pseudodata = pd.concat((tr, val)) - pseudodata.sort_index(level=range(1, 3), inplace=True) + # In order for this function to work also with old fit, it is necessary to remap the names + # being read (since the names in the context have already been remapped) + # The following checks whether a given name is in both the context and the fit, and if not + # tries to get it from the old_to_new mapping. + mapping = {} + context_datasets = context_index.get_level_values("dataset").unique() + for dsname in pseudodata.index.get_level_values("dataset").unique(): + if dsname not in context_datasets: + new_name, _ = legacy_to_new_map(dsname) + mapping[dsname] = new_name + + pseudodata.rename(mapping, level=1, inplace=True) + + pseudodata.sort_index(level=range(1, 3), inplace=True) pseudodata.index = sorted_index tr = pseudodata[pseudodata["type"] == "training"] @@ -177,9 +190,19 @@ def make_replica( return np.concatenate( [cd.central_values for cd in groups_dataset_inputs_loaded_cd_with_cuts] ) - # Seed the numpy RNG with the seed and the name of the datasets in this run - name_salt = "-".join(i.setname for i in groups_dataset_inputs_loaded_cd_with_cuts) + + # TODO: to be simplified after the reader is merged, together with an update of the regression tests + # this is necessary to reproduce exactly the results due to the replicas being generated with a hash + # Only when the sets are legacy (or coming from a legacy runcard) this shall be used + names_for_salt = [] + for loaded_cd in groups_dataset_inputs_loaded_cd_with_cuts: + if loaded_cd.legacy: + names_for_salt.append(loaded_cd.setname) + else: + names_for_salt.append(loaded_cd.legacy_name) + name_salt = "-".join(names_for_salt) + name_seed = int(hashlib.sha256(name_salt.encode()).hexdigest(), 16) % 10**8 rng = np.random.default_rng(seed=replica_mcseed + name_seed) # construct covmat @@ -290,7 +313,7 @@ def level0_commondata_wc(data, fakepdf): # ==== Load validphys.coredata.CommonData instance with cuts ====# for dataset in data.datasets: - commondata_wc = dataset.commondata.load_commondata_instance() + commondata_wc = dataset.commondata.load() if dataset.cuts is not None: cuts = dataset.cuts.load() commondata_wc = commondata_wc.with_cuts(cuts) @@ -381,7 +404,7 @@ def make_level1_data(data, level0_commondata_wc, filterseed, data_index, sep_mul indexed_level1_data = indexed_make_replica(data_index, level1_data) - dataset_order = {cd.setname: i for i, cd in enumerate(level0_commondata_wc)} + dataset_order = {cd.setname: i for i, cd in enumerate(level0_commondata_wc)} # ===== create commondata instances with central values given by pseudo_data =====# level1_commondata_dict = {c.setname: c for c in level0_commondata_wc} @@ -393,7 +416,7 @@ def make_level1_data(data, level0_commondata_wc, filterseed, data_index, sep_mul ) # sort back so as to mantain same order as in level0_commondata_wc level1_commondata_instances_wc.sort(key=lambda x: dataset_order[x.setname]) - + return level1_commondata_instances_wc diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index db5642afd7..2abf07779a 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -25,7 +25,7 @@ ) from validphys.convolution import PredictionsRequireCutsError, central_predictions, predictions from validphys.core import PDF, DataGroupSpec, DataSetSpec, Stats -from validphys.plotoptions import get_info +from validphys.plotoptions.core import get_info log = logging.getLogger(__name__) diff --git a/validphys2/src/validphys/scripts/vp_rebuild_data.py b/validphys2/src/validphys/scripts/vp_rebuild_data.py deleted file mode 100644 index 2c2bb7ecd4..0000000000 --- a/validphys2/src/validphys/scripts/vp_rebuild_data.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -vp_rebuild_data.py - -A script for rebuilding the filtered closure data into the format validphys/n3fit -expect. Run this script on a closure fit before running n3fit to avoid -crashes due to multiple replicas attempting to rebuild filtered data at same time - -This script should also be used after running a closure fit with nnfit, and will -eradicate the need for the data to be rebuilt by validphys when used in analysis. - -Example -------- - -If running a closure test with n3fit, simply run this script on a filtered -closure fit output directory: - -``` -$ vp-setupfit fit_name.yaml -$ vp-rebuild-data fit_name -``` - -and the data is good to go. - -If running a closure test with nnfit DO NOT run this script until after all -replicas have finished. - -""" -# TODO: deprecate this whole scipt! - -import logging -import argparse - -from reportengine import api, colors - -from validphys.app import providers -from validphys.config import Environment -from n3fit.scripts.n3fit_exec import N3FitConfig - -log = logging.getLogger() -log.setLevel(logging.INFO) -log.addHandler(colors.ColorHandler()) - -# We want to have the Config from n3fit to accept a fit as a directory - -REBUILD_CONFIG = dict( - data_input={"from_": "fitinputcontext"}, - theory={"from_": "fit"}, - theoryid={"from_": "theory"}, - use_cuts="fromfit", - # TODO: add namespace specifications to API - closuretest={"from_": "fit"}, - fakedata={"from_": "closuretest"} -) - -def main(): - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument( - 'fit', - type=str, - help=( - "output directory of a closure fit" - ), - ) - args = parser.parse_args() - API = api.API(providers, N3FitConfig, Environment, output=args.fit) - # NOTE: this will trigger validphys.loader.rebuild_commondata_without_cuts - # which creates new files with cut data points padded with zeros, - # strictly for use with closure fits! Does crazy things! - API.data(**REBUILD_CONFIG) - -if __name__ == "__main__": - main() diff --git a/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png b/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png index 103835fd4d..b415ec938e 100644 Binary files a/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png and b/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png differ diff --git a/validphys2/src/validphys/tests/baseline/test_plot_obscorrs.png b/validphys2/src/validphys/tests/baseline/test_plot_obscorrs.png index 2e7ccf8658..c84f5fad80 100644 Binary files a/validphys2/src/validphys/tests/baseline/test_plot_obscorrs.png and b/validphys2/src/validphys/tests/baseline/test_plot_obscorrs.png differ diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index 61aa44947d..4ce524227f 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -60,7 +60,7 @@ def tmp(tmpdir): {'dataset': 'LHCBZMU8TEV', 'cfac': ['NRM']}, ] -POSITIVITIES = ["POSDYCBD", "POSF2S"] +POSITIVITIES = ["NNPDF_POS_5GEV_DYU", "NNPDF_POS_5GEV_F2S"] PDF = "NNPDF40_nnlo_as_01180" HESSIAN_PDF = "NNPDF40_nnlo_as_01180_hessian" diff --git a/validphys2/src/validphys/tests/regressions/dummy_closure_runcard.yaml b/validphys2/src/validphys/tests/regressions/dummy_closure_runcard.yaml deleted file mode 100644 index 0389ac91ee..0000000000 --- a/validphys2/src/validphys/tests/regressions/dummy_closure_runcard.yaml +++ /dev/null @@ -1,120 +0,0 @@ -# -# Configuration file for NNPDF++ -# - -############################################################ -description: Dummy closure fit runcard - -############################################################ -# frac: training fraction -# ewk: apply ewk k-factors -# sys: systematics treatment (see systypes) -dataset_inputs: -# Fixed target DIS -- {dataset: NMC, frac: 0.5} -# ATLAS -- {dataset: ATLASTTBARTOT, frac: 1.0, cfac: [QCD]} -# CMS -- {dataset: CMSZDIFF12, frac: 1.0, cfac: [QCD, NRM], sys: 10} - -############################################################ -datacuts: - t0pdfset: NNPDF31_nnlo_as_0118 # PDF set to generate t0 covmat - q2min: 3.49 # Q2 minimum - w2min: 12.5 # W2 minimum - combocuts: NNPDF31 # NNPDF3.0 final kin. cuts - jetptcut_tev: 0 # jet pt cut for tevatron - jetptcut_lhc: 0 # jet pt cut for lhc - wptcut_lhc: 30.0 # Minimum pT for W pT diff distributions - jetycut_tev: 1e30 # jet rap. cut for tevatron - jetycut_lhc: 1e30 # jet rap. cut for lhc - dymasscut_min: 0 # dy inv.mass. min cut - dymasscut_max: 1e30 # dy inv.mass. max cut - jetcfactcut: 1e30 # jet cfact. cut - -############################################################ -theory: - theoryid: 162 # database id - -############################################################ -trvlseed: 6416578340 -nnseed: 4106399685 -mcseed: 6355854418 -genrep: true # true = generate MC replicas, false = use real data - -parameters: # This defines the parameter dictionary that is passed to the Model Trainer - nodes_per_layer: [30, 30, 42, 18, 8] - activation_per_layer: [tanh, sigmoid, sigmoid, sigmoid, linear] - initializer: glorot_uniform - optimizer: - clipnorm: 46.2 - learning_rate: 0.73 - optimizer_name: Adadelta - epochs: 50000 - positivity: - initial: 370 - multiplier: - integrability: - initial: - multiplier: 1.03 - stopping_patience: 0.30 - layer_type: dense - dropout: 0.005 - -fitting: - seed: 577993051 # set the seed for the random generator - rngalgo: 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) - # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) - # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) - # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) - fitbasis: EVOL # EVOL (7), EVOLQED (8), etc. - basis: - # remeber to change the name of PDF accordingly with fitbasis - # smallx, largex: preprocessing ranges - - {fl: sng, trainable: false, smallx: [1.086, 1.124], largex: [1.466, 3.33]} - - {fl: g, trainable: false, smallx: [0.7804,1.112], largex: [2.499, 6.501]} - - {fl: v, trainable: false, smallx: [0.5347,0.7183], largex: [1.503, 3.976]} - - {fl: v3, trainable: false, smallx: [0.2016,0.4753], largex: [1.781, 4.003]} - - {fl: v8, trainable: false, smallx: [0.5735,0.7563], largex: [1.531, 4.101]} - - {fl: t3, trainable: false, smallx: [-0.3324,0.8232], largex: [1.781, 4.254]} - - {fl: t8, trainable: false, smallx: [0.7985,1.018], largex: [1.515, 4.107]} - - {fl: t15, trainable: false, smallx: [1.033, 1.161], largex: [1.506, 3.504]} - -############################################################ -positivity: - posdatasets: - - {dataset: POSDYC, maxlambda: 1e6} # Positivity Lagrange Multiplier - -############################################################ -integrability: - integdatasets: - - {dataset: INTEGXT8, maxlambda: 1e2} - - {dataset: INTEGXT3, maxlambda: 1e2} - -############################################################ -closuretest: - filterseed: 9116129314 # Random seed to be used in filtering data partitions - fakedata: True # true = to use FAKEPDF to generate pseudo-data - fakepdf: NNPDF31_nnlo_as_0118 # Theory input for pseudo-data - errorsize: 1.0 # uncertainties rescaling - fakenoise: True # true = to add random fluctuations to pseudo-data - rancutprob: 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: false # 0(1) to output training(valiation) chi2 in report - printpdf4gen: false # To print info on PDFs during minimization - -############################################################ -lhagrid: - nx: 150 - xmin: 1e-9 - xmed: 0.1 - xmax: 1.0 - nq: 50 - qmax: 1e5 - -############################################################ -debug: false - -save: false -load: false diff --git a/validphys2/src/validphys/tests/test_filter_rules.py b/validphys2/src/validphys/tests/test_filter_rules.py index ad4de7ea05..05ee29827d 100644 --- a/validphys2/src/validphys/tests/test_filter_rules.py +++ b/validphys2/src/validphys/tests/test_filter_rules.py @@ -2,39 +2,48 @@ import pytest from validphys.api import API -from validphys.loader import FallbackLoader as Loader from validphys.filters import ( + BadPerturbativeOrder, + PerturbativeOrder, Rule, RuleProcessingError, default_filter_settings_input, - PerturbativeOrder, - BadPerturbativeOrder, ) -from validphys.tests.conftest import THEORYID, PDF +from validphys.loader import FallbackLoader as Loader +from validphys.tests.conftest import PDF +from validphys.tests.conftest import THEORYID_NEW as THEORYID bad_rules = [ - {'dataset': 'NMC'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2"}, {'rule': 'x < 0.1'}, {'dataset': 'NOT_EXISTING', 'rule': 'x < 0.1'}, - {'dataset': 'NMC', 'rule': 'x < 0.1', 'PTO': 'bogus'}, - {'dataset': 'NMC', 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}}, - {'dataset': 'NMC', 'rule': 'x < 0.1', 'local_variables': 'bogus'}, - {'dataset': 'NMC', 'rule': 'bogus syntax'}, - {'dataset': 'NMC', 'rule': 'unknown_variable > 10'}, - {'dataset': 'NMC', 'local_variables': {'z': 'bogus syntax'}, 'rule': 'z > 10'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': 'bogus'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'local_variables': 'bogus'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'bogus syntax'}, + {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'unknown_variable > 10'}, + { + 'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", + 'local_variables': {'z': 'bogus syntax'}, + 'rule': 'z > 10', + }, { - 'dataset': 'NMC', + 'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'local_variables': {'z': 'unknown_variable + 1'}, 'rule': 'z > 10', }, - {'dataset': 'NMC', 'local_variables': {'z': 'v+1', 'v': '10'}, 'rule': 'z > 10'}, + { + 'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", + 'local_variables': {'z': 'v+1', 'v': '10'}, + 'rule': 'z > 10', + }, ] # Note: Don't change the order here. In this way it tests all cases. good_rules = [ {'process_type': 'DIS_ALL', 'PTO': 'N3LO', 'rule': 'x < 1e-2'}, {'process_type': 'DIS_ALL', 'IC': 'False', 'rule': 'x < 1e-2'}, - {'process_type': 'JET', 'rule': 'p_T2 < 10'}, + {'process_type': 'JET', 'rule': 'pT < 3.16'}, ] @@ -54,7 +63,7 @@ def test_rule_caching(): for rule_list in (rule_list_1, rule_list_2): cut_list.append( API.cuts( - dataset_input={"dataset": "NMC"}, + dataset_input={"dataset": "NMC_NC_NOTFIXED_DW_EM-F2", "variant": "legacy"}, use_cuts="internal", theoryid=THEORYID, filter_rules=rule_list, @@ -81,18 +90,21 @@ def test_bad_rules(): def test_default_rules(): l = Loader() - dsnames = ['NMC', 'LHCBWZMU8TEV'] - for dsname in dsnames: - ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID) + dsnames = ['NMC_NC_NOTFIXED_DW_EM-F2', 'LHCB_Z0_8TEV_MUON_Y'] + variants = ["legacy", None] + for dsname, v in zip(dsnames, variants): + ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID, variant=v) assert ds.cuts.load() is not None def test_good_rules(): l = Loader() rules = [mkrule(inp) for inp in good_rules] - dsnames = ['ATLAS1JET11', 'NMC'] + dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_DW_EM-F2'] for dsname in dsnames: - ds = l.check_dataset(dsname, cuts='internal', rules=rules, theoryid=THEORYID) + ds = l.check_dataset( + dsname, cuts='internal', rules=rules, theoryid=THEORYID, variant="legacy" + ) assert ds.cuts.load() is not None @@ -101,29 +113,26 @@ def test_added_rules(): "theoryid": THEORYID, "pdf": PDF, "use_cuts": "internal", - "dataset_inputs": [{"dataset": "ATLAS1JET11"}], + "dataset_inputs": [{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "variant": "legacy"}], "filter_rules": [], "dataspecs": [ - { - "speclabel": "Original", - "added_filter_rules": None, - }, + {"speclabel": "Original", "added_filter_rules": None}, { "speclabel": "fewer data", "added_filter_rules": [ - {"dataset": "ATLAS1JET11", "rule": "p_T2 < 1000**2", "reson": "pt cut"} + {"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reson": "pt cut"} ], }, { "speclabel": "empty data", "added_filter_rules": [ - {"dataset": "ATLAS1JET11", "rule": "eta < 0", "reason": "empty data"} + {"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "y < 0", "reason": "empty data"} ], }, ], } tb = API.dataspecs_chi2_table(**inp) assert tb["empty data"]["ndata"].iloc[0] == 0 - assert np.isnan(tb["empty data"].iloc[1,1]) + assert np.isnan(tb["empty data"].iloc[1, 1]) assert tb["empty data"]["ndata"].iloc[0] == 0 assert np.all(tb[1:]["fewer data"] != tb[1:]["Original"]) diff --git a/validphys2/src/validphys/tests/test_loader.py b/validphys2/src/validphys/tests/test_loader.py index c2c9c34577..b106d93e98 100644 --- a/validphys2/src/validphys/tests/test_loader.py +++ b/validphys2/src/validphys/tests/test_loader.py @@ -10,12 +10,10 @@ from hypothesis import given, settings from hypothesis.strategies import composite, sampled_from, sets -import numpy as np import pytest -from validphys.core import CommonDataSpec, Cuts -from validphys.loader import FallbackLoader, FitNotFound, rebuild_commondata_without_cuts, NNPDF_DIR -from validphys.plotoptions import get_info, kitable +from validphys.loader import NNPDF_DIR, FallbackLoader, FitNotFound +from validphys.plotoptions.core import get_info, kitable from validphys.tests.conftest import FIT, FIT_3REPLICAS, THEORYID_NEW l = FallbackLoader() @@ -33,7 +31,8 @@ def load(self): @composite def commondata_and_cuts(draw): - cd = l.check_commondata(draw(sampled_from(dss))) + old_name = draw(sampled_from(dss)) + cd = l.check_commondata(old_name, force_old_format=True) ndata = cd.metadata.ndata # Get a cut mask with at least one selected datapoint masks = sets(sampled_from(range(ndata)), min_size=1) @@ -41,39 +40,6 @@ def commondata_and_cuts(draw): return cd, mask -@given(arg=commondata_and_cuts()) -@settings(deadline=None) -def test_rebuild_commondata_without_cuts(tmp_path_factory, arg): - # We need to create a new directory for each call of the test - # otherwise we get files mixed together - tmp = tmp_path_factory.mktemp("test_loader") - - cd, cuts = arg - lcd = cd.load() - cutspec = None - if cuts: - cutpath = tmp / "cuts.txt" - np.savetxt(cutpath, np.asarray(cuts, dtype=int), fmt="%u") - cutspec = Cuts(cd, cutpath) - lcd = lcd.with_cuts(cuts) - lcd.export(tmp) - # We have to reconstruct the name here... - with_cuts = tmp / f"DATA_{cd.name}.dat" - newpath = tmp / "commondata.dat" - rebuild_commondata_without_cuts(with_cuts, cutspec, cd.datafile, newpath) - newcd = CommonDataSpec(newpath, cd.sysfile, cd.plotfiles) - # Note this one is without cuts - t1 = kitable(cd, get_info(cd)) - t2 = kitable(newcd, get_info(newcd)) - assert (t1 == t2).all - lncd = newcd.load() - if cuts: - assert np.allclose(lncd.get_cv()[cuts], lcd.get_cv()) - nocuts = np.ones(cd.ndata, dtype=bool) - nocuts[cuts] = False - assert (lncd.get_cv()[nocuts] == 0).all() - - @given(inp=commondata_and_cuts()) @settings(deadline=None) def test_kitable_with_cuts(inp): diff --git a/validphys2/src/validphys/tests/test_plots.py b/validphys2/src/validphys/tests/test_plots.py index 1e03f5b06a..53d6166087 100644 --- a/validphys2/src/validphys/tests/test_plots.py +++ b/validphys2/src/validphys/tests/test_plots.py @@ -6,9 +6,9 @@ import pytest from validphys.api import API -from validphys.tests.conftest import PDF, THEORYID, DATA +from validphys.tests.conftest import DATA, PDF, THEORYID -TOLERANCE_VALUE = 12 +TOLERANCE_VALUE = 18 @pytest.mark.linux diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index b6149006ff..74f2dd87d0 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -7,15 +7,15 @@ This is used to benchmark the correctness of the pseudodata recreation. """ -import pandas as pd -import numpy as np from numpy.testing import assert_allclose +import pandas as pd import pytest from validphys.api import API -from validphys.tests.conftest import FIT, PSEUDODATA_FIT, THEORYID, SINGLE_DATASET, PDF -from validphys.loader import Loader from validphys.covmats import dataset_t0_predictions +from validphys.loader import Loader +from validphys.tests.conftest import FIT, PDF, PSEUDODATA_FIT, SINGLE_DATASET, THEORYID + def test_read_fit_pseudodata(): fit_pseudodata = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) @@ -78,9 +78,7 @@ def test_read_matches_recreate(): for read, recreate in zip(reads, recreates): # We ignore the absolute ordering of the dataframes and just check # that they contain identical elements. - pd.testing.assert_frame_equal( - read.pseudodata, recreate.pseudodata, check_like=True - ) + pd.testing.assert_frame_equal(read.pseudodata, recreate.pseudodata, check_like=True) pd.testing.assert_index_equal(read.tr_idx, recreate.tr_idx, check_order=False) pd.testing.assert_index_equal(read.val_idx, recreate.val_idx, check_order=False) @@ -95,17 +93,11 @@ def test_level0_commondata_wc(): l = Loader() datasetspec = l.check_dataset(list(dataset.values())[0], theoryid=THEORYID) t0set = l.check_pdf(pdfname) - + l0_cd = API.level0_commondata_wc( - dataset_inputs=[dataset], - use_cuts="internal", - theoryid=THEORYID, - fakepdf=pdfname, + dataset_inputs=[dataset], use_cuts="internal", theoryid=THEORYID, fakepdf=pdfname ) l0_vals = l0_cd[0].central_values assert_allclose( - dataset_t0_predictions(dataset=datasetspec, t0set=t0set), - l0_vals, - rtol=1e-07, - atol=0, + dataset_t0_predictions(dataset=datasetspec, t0set=t0set), l0_vals, rtol=1e-07, atol=0 ) diff --git a/validphys2/src/validphys/tests/test_pyfkdata.py b/validphys2/src/validphys/tests/test_pyfkdata.py index ee9d4633bf..f1bb958b72 100644 --- a/validphys2/src/validphys/tests/test_pyfkdata.py +++ b/validphys2/src/validphys/tests/test_pyfkdata.py @@ -1,14 +1,14 @@ -import pytest -import pandas as pd import numpy as np from numpy.testing import assert_allclose +import pandas as pd +import pytest from validphys.api import API -from validphys.loader import Loader -from validphys.results import ThPredictionsResult, PositivityResult +from validphys.convolution import central_predictions, linear_predictions, predictions from validphys.fkparser import load_fktable -from validphys.convolution import predictions, central_predictions, linear_predictions -from validphys.tests.conftest import PDF, HESSIAN_PDF, THEORYID, POSITIVITIES +from validphys.loader import FallbackLoader as Loader +from validphys.results import PositivityResult, ThPredictionsResult +from validphys.tests.conftest import HESSIAN_PDF, PDF, POSITIVITIES, THEORYID, THEORYID_NEW def test_basic_loading(): @@ -76,8 +76,12 @@ def test_predictions(pdf_name): # rtol to 1e-2 due to DYE906R and DYE906_D for MC sets # TODO: check whether this tolerance can be decreased when using double precision assert_allclose(cv_predictions, stats_predictions.central_value(), rtol=1e-2) - assert_allclose(core_predictions.error_members, stats_predictions.error_members().T, rtol=1e-3) - assert_allclose(core_predictions.central_value, stats_predictions.central_value(), rtol=1e-2) + assert_allclose( + core_predictions.error_members, stats_predictions.error_members().T, rtol=1e-3 + ) + assert_allclose( + core_predictions.central_value, stats_predictions.central_value(), rtol=1e-2 + ) @pytest.mark.parametrize("pdf_name", [PDF, HESSIAN_PDF]) @@ -148,7 +152,7 @@ def test_compare_cf(data_internal_cuts_config, data_internal_cuts_new_theory_con res_old_cfac = central_predictions(ds_old_cfac, pdf) res_new_cfac = central_predictions(ds_new_cfac, pdf) - old_cfac = res_old_cfac/res_old - new_cfac = res_new_cfac/res_new + old_cfac = res_old_cfac / res_old + new_cfac = res_new_cfac / res_new np.testing.assert_allclose(new_cfac, old_cfac, rtol=1e-4) diff --git a/validphys2/src/validphys/tests/test_rebuilddata.py b/validphys2/src/validphys/tests/test_rebuilddata.py deleted file mode 100644 index f6b00cdd77..0000000000 --- a/validphys2/src/validphys/tests/test_rebuilddata.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -test_rebuilddata.py - -filter some simple closure data and then check that rebuild data runs on it -and produces expected results - -""" -import pathlib -import pytest -import shutil -import subprocess as sp - -from reportengine import api - -from validphys.app import providers -from validphys.config import Environment -from validphys.scripts.vp_rebuild_data import REBUILD_CONFIG -from validphys.tableloader import sane_load -from validphys.tests.test_regressions import make_table_comp - -from n3fit.scripts.n3fit_exec import N3FitConfig - -FIT_NAME = "dummy_closure_runcard" - -REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regressions") - - -def parse_test_output(filename): - """Parse the output of groups_data_values.""" - df = sane_load(filename, header=0, index_col=[0, 1, 2]) - return df - - -@pytest.mark.linux -@make_table_comp(parse_test_output) -def test_filter_rebuild_closure_data(tmp): - """ - Takes a closure test runcard from the regressions directory - and then runs ``vp-setupfit`` in a temp directory and then - ``vp-rebuild-data`` on the resulting fit folder. - - The test then loads the filtered and rebuilt data and checks that the - experimental central values (which generated during ``vp-setupfit``) - take on the expected values. - - """ - runcard_name = FIT_NAME + ".yaml" - runcard = REGRESSION_FOLDER / runcard_name - - # cp runcard to tmp folder - shutil.copy(runcard, tmp) - # filter the runcard - sp.run(f"vp-setupfit {runcard_name}".split(), cwd=tmp, check=True) - - sp.run(f"vp-rebuild-data {FIT_NAME}".split(), cwd=tmp, check=True) - - API = api.API( - providers, N3FitConfig, Environment, output=str(tmp / FIT_NAME) - ) - # to use groups_data_values we need to do some gymnastics with data spec - # because taking data_input from fitinputcontext overwrites any subsequent - # grouping. - input_params = dict(REBUILD_CONFIG) - input_params.pop("data_input") - input_params["dataset_inputs"] = {"from_": "fit"} - df = API.groups_data_values(**input_params, pdf="NNPDF31_nnlo_as_0118") - return df.to_frame() diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index 35989b2eaf..130345f18b 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -15,6 +15,7 @@ from reportengine.table import savetable from validphys import results from validphys.api import API +from validphys.datafiles import legacy_to_new_map from validphys.tableloader import ( load_fits_chi2_table, load_perreplica_chi2_table, @@ -29,7 +30,6 @@ REGRESSION_FOLDER = pathlib.Path(__file__).with_name('regressions') -# TODO: Move these to a library def compare_tables(produced_table, storage_path, loader_func, tolerance=1e-8): """Test that the ``produced_table`` is equal (as in allclose) to the one loaded from the `storage_path` using the `loader_func`""" @@ -38,6 +38,26 @@ def compare_tables(produced_table, storage_path, loader_func, tolerance=1e-8): # Fail test assert False, "Storage path does not exist" stored_table = loader_func(storage_path) + + # TODO: transitional comparison of names for old-new comparison of dataframes + mapping = {} + try: + used_datasets = produced_table.index.get_level_values("dataset").unique() + for dsname in stored_table.index.get_level_values("dataset").unique(): + if dsname not in used_datasets: + new_name, _ = legacy_to_new_map(dsname) + mapping[dsname] = new_name + + # where in the index is it? (usually = 1) and which axes? + for n, axis in enumerate(stored_table.axes): + if "dataset" not in axis.names: + continue + idx = stored_table.index.names.index("dataset") + stored_table.rename(mapping, inplace=True, level=idx, axis=n) + except KeyError: + # Maybe there are no datasets here + pass + ########### assert_frame_equal(produced_table, stored_table, atol=tolerance) diff --git a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py index 33d0f230a9..33fbe12f4d 100644 --- a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py +++ b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py @@ -7,7 +7,7 @@ from reportengine.checks import check, make_argcheck from validphys.loader import Loader -from validphys.plotoptions import get_info +from validphys.plotoptions.core import get_info log = logging.getLogger(__name__) @@ -18,7 +18,10 @@ def check_correct_theory_combination_internal( """Checks that a valid theory combination corresponding to an existing prescription has been inputted""" l = len(theoryids) - check(l in {3, 5, 7, 9, 62, 64, 66, 70, 19, 23}, f"Expecting exactly 3, 5, 7, 9, 62, 64, 66, 23, 19 or 70 theories, but got {l}.") + check( + l in {3, 5, 7, 9, 62, 64, 66, 70, 19, 23}, + f"Expecting exactly 3, 5, 7, 9, 62, 64, 66, 23, 19 or 70 theories, but got {l}.", + ) opts = {"bar", "nobar"} xifs = [theoryid.get_description()["XIF"] for theoryid in theoryids] xirs = [theoryid.get_description()["XIR"] for theoryid in theoryids] diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py index 3471cfb954..af259130e0 100644 --- a/validphys2/src/validphys/utils.py +++ b/validphys2/src/validphys/utils.py @@ -1,19 +1,15 @@ -# -*- coding: utf-8 -*- -""" -Created on Sun Mar 13 21:12:41 2016 - -@author: Zahari Kassabov -""" import contextlib import functools import pathlib import shutil import tempfile -from typing import Any, Sequence, Mapping, Hashable +from typing import Any, Hashable, Mapping, Sequence +from frozendict import frozendict import numpy as np from validobj import ValidationError, parse_input -from frozendict import frozendict + +from reportengine.compat import yaml def make_hashable(obj: Any): @@ -34,20 +30,33 @@ def freeze_args(func): Into immutable Useful to be compatible with cache """ + @functools.wraps(func) def wrapped(*args, **kwargs): args = tuple([make_hashable(arg) for arg in args]) kwargs = {k: make_hashable(v) for k, v in kwargs.items()} return func(*args, **kwargs) + return wrapped -def parse_yaml_inp(inp, spec, path): - """Helper function to parse yaml using the `validobj` library and print +def generate_path_filtered_data(fit_path, setname): + """Utility to ensure that both the loader and tools like setupfit utilize the same convention + to generate the names of generated pseudodata""" + data_path = fit_path / "filter" / setname / f"filtered_data_{setname}.yaml" + unc_path = data_path.with_name(f"filtered_uncertainties_{setname}.yaml") + return data_path, unc_path + + +def parse_yaml_inp(input_yaml, spec): + """ + Helper function to parse yaml using the `validobj` library and print useful error messages in case of a parsing error. https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers """ + input_yaml = pathlib.Path(input_yaml) + inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8")) try: return parse_input(inp, spec) except ValidationError as e: @@ -62,7 +71,7 @@ def parse_yaml_inp(inp, spec, path): # ``(line_number, column)`` for a given item in # the mapping. line = current_inp.lc.item(wrong_field)[0] - error_text_lines.append(f"Problem processing key at line {line} in {path}:") + error_text_lines.append(f"Problem processing key at line {line} in {input_yaml}:") current_inp = current_inp[wrong_field] elif hasattr(current_exc, 'wrong_index'): wrong_index = current_exc.wrong_index @@ -70,7 +79,9 @@ def parse_yaml_inp(inp, spec, path): # a given item. line = current_inp.lc.item(wrong_index)[0] current_inp = current_inp[wrong_index] - error_text_lines.append(f"Problem processing list item at line {line} in {path}:") + error_text_lines.append( + f"Problem processing list item at line {line} in {input_yaml}:" + ) elif hasattr(current_exc, 'unknown'): unknown_lines = [] for u in current_exc.unknown: @@ -78,7 +89,7 @@ def parse_yaml_inp(inp, spec, path): unknown_lines.sort() for line, key in unknown_lines: error_text_lines.append( - f"Unknown key {key!r} defined at line {line} in {path}:" + f"Unknown key {key!r} defined at line {line} in {input_yaml}:" ) error_text_lines.append(str(current_exc)) current_exc = current_exc.__cause__ @@ -136,7 +147,6 @@ def tempfile_cleaner(root, exit_func, exc, prefix=None, **kwargs): prefix="tutorial_", dst="completed", ) as tempdir: - new_file = tempdir / "new_file" input("Press enter to continue or Ctrl-C to interrupt:\\n") new_file.touch()