diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..486a232 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.zip filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index be91fdb..24b0b62 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,5 @@ servicex.yaml *.pstats servicex/test.py *.html +.DS_Store +figures/ diff --git a/README.md b/README.md index 951c1b1..e39dba9 100644 --- a/README.md +++ b/README.md @@ -36,13 +36,15 @@ with additional files: * `input_files/container_list.txt`: list of containers to run over * `input_files/produce_container_metadata.py`: query metadata for containers: number of files / events, size * `input_files/container_metadata.json`: output of `input_files/produce_container_metadata.py` with container metadata -* `input_files/get_file_list.py`: for a given dataset creates a txt file listing file access paths that include apropriate xcache. The same kind of output can be obtained by doing: +* `input_files/get_file_list.py`: for a given dataset creates a txt file listing file access paths that include appropriate xcache. The same kind of output can be obtained by doing: ``` export SITE_NAME=AF_200 rucio list-file-replicas mc20_13TeV:mc20_13TeV.364126.Sherpa_221_NNPDF30NNLO_Zee_MAXHTPTV500_1000.deriv.DAOD_PHYSLITE.e5299_s3681_r13145_p6026 --protocol root --pfns --rses MWT2_UC_LOCALGROUPDISK ``` +* `input_files/containers_to_files.py`: process the list of containers into a list of files per container with hardcoded xcache instances, writes to `input_files/file_lists/*`. + ### Branch list determination Branches to be read are determined with a 2018 data file. diff --git a/input_files/containers_to_files.py b/input_files/containers_to_files.py new file mode 100644 index 0000000..b46af86 --- /dev/null +++ b/input_files/containers_to_files.py @@ -0,0 +1,30 @@ +# process list of containers into list of files with hardcoded xcache instances + +# to run get_file_list.py, use e.g. a venv on uchicago via ssh +# python3 -m venv venv +# source venv/bin/activate +# pip install xmltodict +# (assuming setupATLAS / lsetup rucio + proxy present) + +import os +import shutil + +if __name__ == "__main__": + with open("container_list.txt") as f: + containers = f.readlines() + + for container in containers: + container = container.strip() + + if "#" in container: + continue # skip comments + + cmd = f"python get_file_list.py {container}" + print(cmd) + os.system(cmd) # produce file list + + # create zipped version of folder with file lists + shutil.make_archive("file_lists", "zip", "file_lists") + + # cleanup: delete non-zipped version + shutil.rmtree("file_lists") diff --git a/input_files/file_lists.zip b/input_files/file_lists.zip new file mode 100644 index 0000000..017e112 --- /dev/null +++ b/input_files/file_lists.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2515a94ccf91973c779c4bcdfd1f857d1314643d320e7dab33a2d3c4b6735b4 +size 2243080 diff --git a/input_files/get_file_list.py b/input_files/get_file_list.py index eb88cfe..19fe463 100644 --- a/input_files/get_file_list.py +++ b/input_files/get_file_list.py @@ -13,6 +13,7 @@ import logging import xmltodict import hashlib +import os from rucio.common.exception import DataIdentifierNotFound from rucio.client.scopeclient import ScopeClient @@ -140,12 +141,16 @@ def hash_string(input_string): return int_value +output_directory = "file_lists" +if not os.path.exists(output_directory): + os.mkdir(output_directory) + cf = [] for f in files: c = hash_string(f) % len(caches) cf.append(f'root://{caches[c]}//{f}') print(f) -with open(f'{did}.txt', 'w') as file: +with open(f'{output_directory}/{did.replace(":", "-")}.txt', 'w') as file: for f in cf: file.write(f + '\n') diff --git a/input_files/utils.py b/input_files/utils.py new file mode 100644 index 0000000..4fab071 --- /dev/null +++ b/input_files/utils.py @@ -0,0 +1,69 @@ +from collections import defaultdict +import json +from pathlib import Path +import zipfile + +from . import find_containers + +DIR = Path(__file__).parent.resolve() + + +def get_dsids(process): + if "data" not in process: + return find_containers.container_dict[process] + else: + return [process] + + +def get_fileset(processes_to_use, max_files_per_container=None): + with open(DIR / "container_metadata.json") as f: + container_metadata = json.load(f) # name -> metadata + + container_to_file_list = {} # container name -> list of files + + # read from zipped file + with zipfile.ZipFile(DIR / "file_lists.zip") as z: + for filename in sorted(z.namelist()): + container_name = filename.split("/")[-1:][0].replace("-", ":").replace(".txt", "") + with z.open(filename) as f: + file_list = f.readlines() + + # limit amount of files per container + if max_files_per_container is not None: + file_list = file_list[:max_files_per_container] + + container_to_file_list[container_name] = [p.decode("utf-8").strip() for p in file_list] + + fileset = defaultdict(lambda: defaultdict(dict)) # process -> list of files + total_nfiles = 0 + total_size_TB = 0 + total_nevts = 0 + for process in processes_to_use: + dsids = get_dsids(process) + for dsid in dsids: + # find matching containers + matching_containers = [c for c in list(container_to_file_list.keys()) if str(dsid) in c] + # for each container, add full list of files + for container in matching_containers: + file_list = container_to_file_list[container] + total_nfiles += len(file_list) + if max_files_per_container is None: + assert len(file_list) == container_metadata[container]["nfiles"] + total_size_TB += container_metadata[container]["size_TB"] + total_nevts += container_metadata[container]["nevts"] + fileset[process]["files"].update(dict(zip(file_list, ["CollectionTree"]*len(file_list)))) + + print("fileset summary") + print(f" - number of files: {total_nfiles:,}") + if max_files_per_container is None: + print(f" - total size: {total_size_TB:.3f} TB") + print(f" - number of nevts: {total_nevts:,}") + else: + print("cannot determine total size / number of events when max_files_per_container is being used") + + return fileset + + +if __name__ == "__main__": + processes = ["db", "zjets", "wjets", "ttV", "othertop", "ttbar", "data15_13TeV", "data16_13TeV", "data17_13TeV", "data18_13TeV"] + get_fileset(processes) diff --git a/materialize_branches.ipynb b/materialize_branches.ipynb index 6eccbe6..69524ff 100644 --- a/materialize_branches.ipynb +++ b/materialize_branches.ipynb @@ -21,7 +21,10 @@ } ], "source": [ + "import glob\n", + "import json\n", "import os\n", + "from collections import defaultdict\n", "from pathlib import Path\n", "\n", "import awkward as ak\n", @@ -44,14 +47,19 @@ "\n", "warnings.filterwarnings(\"ignore\")\n", "\n", + "from input_files import utils\n", + "\n", "from dask.distributed import LocalCluster, Client, progress, performance_report\n", "\n", "# local: single thread, single worker\n", - "cluster = LocalCluster(n_workers=1, processes=False, threads_per_worker=1)\n", - "client = Client(cluster)\n", + "# cluster = LocalCluster(n_workers=1, processes=False, threads_per_worker=1)\n", + "# client = Client(cluster)\n", "\n", "# for UChicago\n", - "# client = Client(\"tcp://dask-alheld-a76c9434-b.af-jupyter:8786\")\n", + "client = Client(\"tcp://dask-alheld-f730f827-a.af-jupyter:8786\") # update this to point to your own client!\n", + "\n", + "figures_dir = Path.cwd() / \"figures\"\n", + "figures_dir.mkdir(exist_ok=True)\n", "\n", "print(f\"awkward: {ak.__version__}\")\n", "print(f\"dask-awkward: {dak.__version__}\")\n", @@ -75,9 +83,9 @@ "metadata": {}, "outputs": [], "source": [ - "fname = \"/data/alheld/200gbps-atlas/data18_13TeV.periodAllYear.physics_Main.PhysCont.DAOD_PHYSLITE.grp18_v01_p6026/DAOD_PHYSLITE.37021624._000036.pool.root.1\"\n", - "treename = \"CollectionTree\"\n", - "events = NanoEventsFactory.from_root({fname: treename}, schemaclass=PHYSLITESchema).events()" + "# fname = \"/data/alheld/200gbps-atlas/data18_13TeV.periodAllYear.physics_Main.PhysCont.DAOD_PHYSLITE.grp18_v01_p6026/DAOD_PHYSLITE.37021624._000036.pool.root.1\"\n", + "# treename = \"CollectionTree\"\n", + "# events = NanoEventsFactory.from_root({fname: treename}, schemaclass=PHYSLITESchema).events()" ] }, { @@ -88,9 +96,75 @@ "### distributed coffea" ] }, + { + "cell_type": "markdown", + "id": "f7f24e12-3fd8-481c-8318-7f9277f72c50", + "metadata": {}, + "source": [ + "build a `fileset` using the list of files corresponding to some selected containers" + ] + }, { "cell_type": "code", "execution_count": 3, + "id": "0ef7dd8d-68e5-43fa-b8bf-5d8f972520c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fileset summary\n", + " - number of files: 3\n", + "cannot determine total size / number of events when max_files_per_container is being used\n" + ] + } + ], + "source": [ + "# -------------\n", + "# CONFIGURATION\n", + "# -------------\n", + "# modify this to change how many files are being processed\n", + "# full list is list(find_containers.container_dict.keys()) + [\"data15_13TeV\", \"data16_13TeV\", \"data17_13TeV\", \"data18_13TeV\"]\n", + "\n", + "PROCESSES_TO_USE = [\"ttbar\"] # 6.7 TB\n", + "# PROCESSES_TO_USE = [\"db\", \"zjets\", \"wjets\", \"ttV\", \"othertop\", \"ttbar\"] # all simulation, 48.4 TB\n", + "# PROCESSES_TO_USE = [\"db\", \"zjets\", \"wjets\", \"ttV\", \"othertop\", \"ttbar\", \"data15_13TeV\", \"data16_13TeV\", \"data17_13TeV\", \"data18_13TeV\"] # 191 TB\n", + "\n", + "fileset = utils.get_fileset(PROCESSES_TO_USE, max_files_per_container=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ff312282-a87b-4394-aff4-6acc12d27c67", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "files at MWT2: 3, elsewhere: 0\n" + ] + } + ], + "source": [ + "# check for files not yet replicated to MWT2\n", + "files_at_mwt2 = 0\n", + "files_elsewhere = 0\n", + "for process in fileset.keys():\n", + " for file in fileset[process][\"files\"]:\n", + " if \"mwt2\" in file:\n", + " files_at_mwt2 += 1\n", + " else:\n", + " files_elsewhere += 1\n", + "\n", + "print(f\"files at MWT2: {files_at_mwt2}, elsewhere: {files_elsewhere}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "0a38efe4-8024-422c-ba42-12bd3a3b44cd", "metadata": { "tags": [] @@ -190,41 +264,9 @@ " return {\"nevts\": num_events, \"_counter\": _counter}" ] }, - { - "cell_type": "markdown", - "id": "f7f24e12-3fd8-481c-8318-7f9277f72c50", - "metadata": {}, - "source": [ - "just run over a local data file here as an example" - ] - }, { "cell_type": "code", - "execution_count": 4, - "id": "684f1240-a754-4dd1-b861-1bfac65ec288", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'data': {'files': {'/data/alheld/200gbps-atlas/data18_13TeV.periodAllYear.physics_Main.PhysCont.DAOD_PHYSLITE.grp18_v01_p6026/DAOD_PHYSLITE.37021624._000036.pool.root.1': 'CollectionTree'}}}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fileset = {\"data\": {\"files\": {fname: treename}}}\n", - "fileset" - ] - }, - { - "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "ed1ecc41-0a72-44ec-a8b8-654286a02196", "metadata": { "tags": [] @@ -234,20 +276,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.17 s, sys: 25.1 ms, total: 1.2 s\n", - "Wall time: 1.18 s\n" + "CPU times: user 62.8 ms, sys: 14.3 ms, total: 77.1 ms\n", + "Wall time: 2.36 s\n" ] } ], "source": [ "%%time\n", "# pre-process\n", - "samples, _ = dataset_tools.preprocess(fileset, step_size=1_000_000)" + "samples, report = dataset_tools.preprocess(fileset, skip_bad_files=True, uproot_options={\"allow_read_errors_with_report\": True})" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, + "id": "a7cf89ae-569f-4191-963d-d0e122eaf7ea", + "metadata": {}, + "outputs": [], + "source": [ + "# find issues where access did not work\n", + "for process in report:\n", + " for k, v in report[process][\"files\"].items():\n", + " if v[\"steps\"] is None:\n", + " print(f\"could not read {k}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "dca7b2ea-ae18-41ac-a4a8-cf257621dd10", "metadata": {}, "outputs": [], @@ -342,7 +398,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "15835a57-1182-4efb-8306-07f36af7a5b2", "metadata": { "tags": [] @@ -352,8 +408,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.15 s, sys: 34.3 ms, total: 2.18 s\n", - "Wall time: 2.16 s\n" + "CPU times: user 3.96 s, sys: 111 ms, total: 4.07 s\n", + "Wall time: 4.09 s\n" ] } ], @@ -377,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "6b6dfb17-6806-46c8-aa59-cd475e40cf64", "metadata": { "tags": [] @@ -387,10 +443,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "total time spent in uproot reading data: 65.92 s\n", - "wall time: 69.24s\n", - "CPU times: user 30.3 s, sys: 40.3 s, total: 1min 10s\n", - "Wall time: 1min 9s\n" + "total time spent in uproot reading data: 40.30 s\n", + "wall time: 20.30s\n", + "CPU times: user 550 ms, sys: 73.8 ms, total: 624 ms\n", + "Wall time: 20.3 s\n" ] } ], @@ -408,7 +464,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "bb47e9e5-57ea-4385-b8f8-b2a22ded030e", "metadata": {}, "outputs": [ @@ -416,11 +472,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "output: {'data': {'nevts': 127482, '_counter': 97995532}}\n", + "output: {'ttbar': {'nevts': 80000, '_counter': 53556499}}\n", "\n", "performance metrics:\n", - " - event rate: 1.84 kHz\n", - " - read 409.56 MB in 69.24 s -> 5.92 MBps (need to scale by x4226 to reach 200 Gbps)\n" + " - event rate: 3.94 kHz\n", + " - read 261.50 MB in 20.30 s -> 12.88 MBps (need to scale by x1941 to reach 200 Gbps)\n" ] } ], @@ -429,25 +485,26 @@ "\n", "print(\"\\nperformance metrics:\")\n", "\n", - "event_rate = out[\"data\"][\"nevts\"] / (t1-t0)\n", + "event_rate = sum([out[process][\"nevts\"] for process in out.keys()]) / (t1-t0)\n", "print(f\" - event rate: {event_rate / 1_000:.2f} kHz\")\n", "\n", "# need uproot>=5.3.2 to get these useful performance stats\n", - "read_MB = sum(report['data']['performance_counters']['num_requested_bytes']) / 1_000**2\n", + "num_bytes = ak.sum([report[process][\"performance_counters\"][\"num_requested_bytes\"] for process in out.keys()])\n", + "read_MB = num_bytes / 1_000**2\n", "rate_Mbs = read_MB / (t1-t0)\n", "print(f\" - read {read_MB:.2f} MB in {t1-t0:.2f} s -> {rate_Mbs:.2f} MBps (need to scale by x{200/8/rate_Mbs*1000:.0f} to reach 200 Gbps)\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "51fb1ab6-0be4-4b99-942a-c1b53eda5639", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'from-uproot-a425e4500d963f22bf8285371d571b32': frozenset({'AnalysisElectronsAuxDyn.DFCommonElectronsECIDSResult',\n", + "{'from-uproot-08c819d5b3e2667624a7b22c8e1cbdd5': frozenset({'AnalysisElectronsAuxDyn.DFCommonElectronsECIDSResult',\n", " 'AnalysisElectronsAuxDyn.ambiguityLink',\n", " 'AnalysisElectronsAuxDyn.eta',\n", " 'AnalysisElectronsAuxDyn.f1',\n", @@ -531,7 +588,7 @@ " 'PrimaryVerticesAuxDyn.z'})}" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -546,12 +603,14 @@ "id": "5e95cec3-d919-4973-9989-e343f6745199", "metadata": {}, "source": [ - "## sanity check: read those columns without any Dask / coffea, compare footprint" + "## sanity check: read those columns without any Dask / coffea, compare footprint\n", + "\n", + "this does not scale and uses a single hardcoded file" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "49190b2a-0953-48b1-b56a-83b6e8298cfd", "metadata": {}, "outputs": [ @@ -566,7 +625,9 @@ "source": [ "branches_touched = list(list(dak.report_necessary_columns(tasks).values())[0])\n", "\n", - "t = uproot.open({fname: \"CollectionTree\"})\n", + "fname = \"/data/alheld/200gbps-atlas/data18_13TeV.periodAllYear.physics_Main.PhysCont.DAOD_PHYSLITE.grp18_v01_p6026/DAOD_PHYSLITE.37021624._000036.pool.root.1\"\n", + "treename = \"CollectionTree\"\n", + "t = uproot.open({fname: treename})\n", "\n", "initial_size_in_MB = t.file.source.num_requested_bytes/1000**2 # non-zero at the start (some metadata read)\n", "\n", @@ -594,7 +655,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "5acaa518-16e2-444b-b086-d778fb096163", "metadata": {}, "outputs": [ @@ -602,88 +663,88 @@ "name": "stdout", "output_type": "stream", "text": [ - "AnalysisJetsAuxDyn.pt : 249 ms\t\t 5.03 MB\t\t 20.2 MB/s\n", - "AnalysisJetsAuxDyn.eta : 251 ms\t\t 5.25 MB\t\t 20.9 MB/s\n", - "AnalysisJetsAuxDyn.phi : 248 ms\t\t 5.24 MB\t\t 21.1 MB/s\n", - "AnalysisJetsAuxDyn.m : 248 ms\t\t 5.05 MB\t\t 20.4 MB/s\n", - "AnalysisElectronsAuxDyn.pt : 367 ms\t\t 0.67 MB\t\t 1.8 MB/s\n", - "AnalysisElectronsAuxDyn.eta : 371 ms\t\t 0.68 MB\t\t 1.8 MB/s\n", - "AnalysisElectronsAuxDyn.phi : 372 ms\t\t 0.68 MB\t\t 1.8 MB/s\n", - "AnalysisElectronsAuxDyn.m : 373 ms\t\t 0.43 MB\t\t 1.2 MB/s\n", - "AnalysisMuonsAuxDyn.pt : 377 ms\t\t 0.74 MB\t\t 2.0 MB/s\n", - "AnalysisMuonsAuxDyn.eta : 377 ms\t\t 0.75 MB\t\t 2.0 MB/s\n", - "AnalysisMuonsAuxDyn.phi : 374 ms\t\t 0.75 MB\t\t 2.0 MB/s\n", - "AnalysisJetsAuxDyn.EnergyPerSampling : 1982 ms\t\t38.83 MB\t\t 19.6 MB/s\n", - "AnalysisJetsAuxDyn.SumPtTrkPt500 : 2107 ms\t\t30.52 MB\t\t 14.5 MB/s\n", - "AnalysisJetsAuxDyn.TrackWidthPt1000 : 1999 ms\t\t17.00 MB\t\t 8.5 MB/s\n", - "PrimaryVerticesAuxDyn.z : 324 ms\t\t13.11 MB\t\t 40.4 MB/s\n", - "PrimaryVerticesAuxDyn.x : 322 ms\t\t10.82 MB\t\t 33.6 MB/s\n", - "PrimaryVerticesAuxDyn.y : 323 ms\t\t10.53 MB\t\t 32.6 MB/s\n", - "AnalysisJetsAuxDyn.NumTrkPt500 : 2279 ms\t\t10.36 MB\t\t 4.5 MB/s\n", - "AnalysisJetsAuxDyn.NumTrkPt1000 : 1987 ms\t\t 7.63 MB\t\t 3.8 MB/s\n", - "AnalysisJetsAuxDyn.SumPtChargedPFOPt500 : 1945 ms\t\t 5.96 MB\t\t 3.1 MB/s\n", - "AnalysisJetsAuxDyn.Timing : 293 ms\t\t 5.31 MB\t\t 18.1 MB/s\n", - "AnalysisJetsAuxDyn.JetConstitScaleMomentum_eta : 292 ms\t\t 5.26 MB\t\t 18.0 MB/s\n", - "AnalysisJetsAuxDyn.ActiveArea4vec_eta : 293 ms\t\t 5.25 MB\t\t 18.0 MB/s\n", - "AnalysisJetsAuxDyn.DetectorEta : 293 ms\t\t 5.25 MB\t\t 17.9 MB/s\n", - "AnalysisJetsAuxDyn.JetConstitScaleMomentum_phi : 295 ms\t\t 5.25 MB\t\t 17.8 MB/s\n", - "AnalysisJetsAuxDyn.ActiveArea4vec_phi : 598 ms\t\t 5.24 MB\t\t 8.8 MB/s\n", - "AnalysisJetsAuxDyn.JetConstitScaleMomentum_m : 292 ms\t\t 5.13 MB\t\t 17.6 MB/s\n", - "AnalysisJetsAuxDyn.JetConstitScaleMomentum_pt : 292 ms\t\t 5.07 MB\t\t 17.4 MB/s\n", - "AnalysisJetsAuxDyn.EMFrac : 292 ms\t\t 5.04 MB\t\t 17.3 MB/s\n", - "AnalysisJetsAuxDyn.Width : 291 ms\t\t 5.01 MB\t\t 17.2 MB/s\n", - "AnalysisJetsAuxDyn.ActiveArea4vec_m : 292 ms\t\t 4.90 MB\t\t 16.8 MB/s\n", - "AnalysisJetsAuxDyn.ActiveArea4vec_pt : 293 ms\t\t 4.85 MB\t\t 16.5 MB/s\n", - "AnalysisJetsAuxDyn.DFCommonJets_QGTagger_TracksWidth : 295 ms\t\t 3.78 MB\t\t 12.8 MB/s\n", - "AnalysisJetsAuxDyn.PSFrac : 294 ms\t\t 3.67 MB\t\t 12.5 MB/s\n", - "AnalysisJetsAuxDyn.JVFCorr : 297 ms\t\t 3.49 MB\t\t 11.7 MB/s\n", - "AnalysisJetsAuxDyn.DFCommonJets_QGTagger_TracksC1 : 292 ms\t\t 3.39 MB\t\t 11.6 MB/s\n", - "AnalysisJetsAuxDyn.DFCommonJets_fJvt : 291 ms\t\t 1.67 MB\t\t 5.7 MB/s\n", - "AnalysisJetsAuxDyn.DFCommonJets_QGTagger_NTracks : 291 ms\t\t 1.50 MB\t\t 5.2 MB/s\n", - "AnalysisJetsAuxDyn.GhostMuonSegmentCount : 293 ms\t\t 1.24 MB\t\t 4.2 MB/s\n", - "AnalysisMuonsAuxDyn.muonSegmentLinks : 566 ms\t\t 0.82 MB\t\t 1.5 MB/s\n", - "AnalysisMuonsAuxDyn.msOnlyExtrapolatedMuonSpectrometerTrackParticleLink : 1102 ms\t\t 0.46 MB\t\t 0.4 MB/s\n", - "AnalysisMuonsAuxDyn.extrapolatedMuonSpectrometerTrackParticleLink : 817 ms\t\t 0.44 MB\t\t 0.5 MB/s\n", - "AnalysisMuonsAuxDyn.inDetTrackParticleLink : 1092 ms\t\t 0.44 MB\t\t 0.4 MB/s\n", - "AnalysisMuonsAuxDyn.muonSpectrometerTrackParticleLink : 819 ms\t\t 0.44 MB\t\t 0.5 MB/s\n", - "AnalysisMuonsAuxDyn.momentumBalanceSignificance : 454 ms\t\t 0.77 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.topoetcone20_CloseByCorr : 453 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.scatteringCurvatureSignificance : 452 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.scatteringNeighbourSignificance : 454 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.neflowisol20_CloseByCorr : 455 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.topoetcone20 : 458 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.topoetcone30 : 452 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.topoetcone40 : 453 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.neflowisol20 : 451 ms\t\t 0.75 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.segmentDeltaEta : 451 ms\t\t 0.75 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.DFCommonJetDr : 449 ms\t\t 0.75 MB\t\t 1.7 MB/s\n", - "AnalysisMuonsAuxDyn.combinedTrackParticleLink : 815 ms\t\t 0.41 MB\t\t 0.5 MB/s\n", - "AnalysisMuonsAuxDyn.InnerDetectorPt : 452 ms\t\t 0.74 MB\t\t 1.6 MB/s\n", - "AnalysisMuonsAuxDyn.MuonSpectrometerPt : 450 ms\t\t 0.74 MB\t\t 1.6 MB/s\n", - "AnalysisMuonsAuxDyn.clusterLink : 819 ms\t\t 0.39 MB\t\t 0.5 MB/s\n", - "AnalysisMuonsAuxDyn.spectrometerFieldIntegral : 450 ms\t\t 0.72 MB\t\t 1.6 MB/s\n", - "AnalysisElectronsAuxDyn.ambiguityLink : 820 ms\t\t 0.39 MB\t\t 0.5 MB/s\n", - "AnalysisMuonsAuxDyn.EnergyLoss : 781 ms\t\t 0.72 MB\t\t 0.9 MB/s\n", - "AnalysisJetsAuxDyn.NNJvtPass : 452 ms\t\t 0.70 MB\t\t 1.6 MB/s\n", - "AnalysisElectronsAuxDyn.topoetcone20_CloseByCorr : 451 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", - "AnalysisElectronsAuxDyn.topoetcone20ptCorrection : 452 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", - "AnalysisElectronsAuxDyn.topoetcone20 : 451 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", - "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt500_CloseByCorr : 453 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", - "AnalysisElectronsAuxDyn.DFCommonElectronsECIDSResult : 453 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", - "AnalysisElectronsAuxDyn.neflowisol20 : 451 ms\t\t 0.68 MB\t\t 1.5 MB/s\n", - "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt500 : 450 ms\t\t 0.68 MB\t\t 1.5 MB/s\n", - "AnalysisMuonsAuxDyn.ptcone40 : 453 ms\t\t 0.66 MB\t\t 1.5 MB/s\n", - "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt1000_CloseByCorr : 450 ms\t\t 0.66 MB\t\t 1.5 MB/s\n", - "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt1000 : 457 ms\t\t 0.65 MB\t\t 1.4 MB/s\n", - "AnalysisMuonsAuxDyn.ptvarcone40 : 453 ms\t\t 0.65 MB\t\t 1.4 MB/s\n", + "AnalysisJetsAuxDyn.pt : 341 ms\t\t 5.03 MB\t\t 14.7 MB/s\n", + "AnalysisJetsAuxDyn.eta : 341 ms\t\t 5.25 MB\t\t 15.4 MB/s\n", + "AnalysisJetsAuxDyn.phi : 341 ms\t\t 5.24 MB\t\t 15.4 MB/s\n", + "AnalysisJetsAuxDyn.m : 338 ms\t\t 5.05 MB\t\t 15.0 MB/s\n", + "AnalysisElectronsAuxDyn.pt : 455 ms\t\t 0.67 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.eta : 456 ms\t\t 0.68 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.phi : 457 ms\t\t 0.68 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.m : 461 ms\t\t 0.43 MB\t\t 0.9 MB/s\n", + "AnalysisMuonsAuxDyn.pt : 456 ms\t\t 0.74 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.eta : 457 ms\t\t 0.75 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.phi : 459 ms\t\t 0.75 MB\t\t 1.6 MB/s\n", + "AnalysisJetsAuxDyn.EnergyPerSampling : 2277 ms\t\t38.83 MB\t\t 17.0 MB/s\n", + "AnalysisJetsAuxDyn.SumPtTrkPt500 : 2439 ms\t\t30.52 MB\t\t 12.5 MB/s\n", + "AnalysisJetsAuxDyn.TrackWidthPt1000 : 2141 ms\t\t17.00 MB\t\t 7.9 MB/s\n", + "PrimaryVerticesAuxDyn.z : 372 ms\t\t13.11 MB\t\t 35.3 MB/s\n", + "PrimaryVerticesAuxDyn.x : 376 ms\t\t10.82 MB\t\t 28.8 MB/s\n", + "PrimaryVerticesAuxDyn.y : 378 ms\t\t10.53 MB\t\t 27.8 MB/s\n", + "AnalysisJetsAuxDyn.NumTrkPt500 : 2412 ms\t\t10.36 MB\t\t 4.3 MB/s\n", + "AnalysisJetsAuxDyn.NumTrkPt1000 : 2120 ms\t\t 7.63 MB\t\t 3.6 MB/s\n", + "AnalysisJetsAuxDyn.SumPtChargedPFOPt500 : 2071 ms\t\t 5.96 MB\t\t 2.9 MB/s\n", + "AnalysisJetsAuxDyn.Timing : 341 ms\t\t 5.31 MB\t\t 15.6 MB/s\n", + "AnalysisJetsAuxDyn.JetConstitScaleMomentum_eta : 338 ms\t\t 5.26 MB\t\t 15.5 MB/s\n", + "AnalysisJetsAuxDyn.ActiveArea4vec_eta : 336 ms\t\t 5.25 MB\t\t 15.6 MB/s\n", + "AnalysisJetsAuxDyn.DetectorEta : 336 ms\t\t 5.25 MB\t\t 15.6 MB/s\n", + "AnalysisJetsAuxDyn.JetConstitScaleMomentum_phi : 336 ms\t\t 5.25 MB\t\t 15.6 MB/s\n", + "AnalysisJetsAuxDyn.ActiveArea4vec_phi : 559 ms\t\t 5.24 MB\t\t 9.4 MB/s\n", + "AnalysisJetsAuxDyn.JetConstitScaleMomentum_m : 338 ms\t\t 5.13 MB\t\t 15.2 MB/s\n", + "AnalysisJetsAuxDyn.JetConstitScaleMomentum_pt : 336 ms\t\t 5.07 MB\t\t 15.1 MB/s\n", + "AnalysisJetsAuxDyn.EMFrac : 338 ms\t\t 5.04 MB\t\t 14.9 MB/s\n", + "AnalysisJetsAuxDyn.Width : 341 ms\t\t 5.01 MB\t\t 14.7 MB/s\n", + "AnalysisJetsAuxDyn.ActiveArea4vec_m : 337 ms\t\t 4.90 MB\t\t 14.5 MB/s\n", + "AnalysisJetsAuxDyn.ActiveArea4vec_pt : 336 ms\t\t 4.85 MB\t\t 14.5 MB/s\n", + "AnalysisJetsAuxDyn.DFCommonJets_QGTagger_TracksWidth : 340 ms\t\t 3.78 MB\t\t 11.1 MB/s\n", + "AnalysisJetsAuxDyn.PSFrac : 340 ms\t\t 3.67 MB\t\t 10.8 MB/s\n", + "AnalysisJetsAuxDyn.JVFCorr : 336 ms\t\t 3.49 MB\t\t 10.4 MB/s\n", + "AnalysisJetsAuxDyn.DFCommonJets_QGTagger_TracksC1 : 338 ms\t\t 3.39 MB\t\t 10.0 MB/s\n", + "AnalysisJetsAuxDyn.DFCommonJets_fJvt : 335 ms\t\t 1.67 MB\t\t 5.0 MB/s\n", + "AnalysisJetsAuxDyn.DFCommonJets_QGTagger_NTracks : 338 ms\t\t 1.50 MB\t\t 4.4 MB/s\n", + "AnalysisJetsAuxDyn.GhostMuonSegmentCount : 342 ms\t\t 1.24 MB\t\t 3.6 MB/s\n", + "AnalysisMuonsAuxDyn.muonSegmentLinks : 662 ms\t\t 0.82 MB\t\t 1.2 MB/s\n", + "AnalysisMuonsAuxDyn.msOnlyExtrapolatedMuonSpectrometerTrackParticleLink : 819 ms\t\t 0.46 MB\t\t 0.6 MB/s\n", + "AnalysisMuonsAuxDyn.extrapolatedMuonSpectrometerTrackParticleLink : 815 ms\t\t 0.44 MB\t\t 0.5 MB/s\n", + "AnalysisMuonsAuxDyn.inDetTrackParticleLink : 809 ms\t\t 0.44 MB\t\t 0.5 MB/s\n", + "AnalysisMuonsAuxDyn.muonSpectrometerTrackParticleLink : 811 ms\t\t 0.44 MB\t\t 0.5 MB/s\n", + "AnalysisMuonsAuxDyn.momentumBalanceSignificance : 457 ms\t\t 0.77 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.topoetcone20_CloseByCorr : 457 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.scatteringCurvatureSignificance : 456 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.scatteringNeighbourSignificance : 457 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.neflowisol20_CloseByCorr : 460 ms\t\t 0.76 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.topoetcone20 : 457 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.topoetcone30 : 456 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.topoetcone40 : 457 ms\t\t 0.76 MB\t\t 1.7 MB/s\n", + "AnalysisMuonsAuxDyn.neflowisol20 : 461 ms\t\t 0.75 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.segmentDeltaEta : 459 ms\t\t 0.75 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.DFCommonJetDr : 456 ms\t\t 0.75 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.combinedTrackParticleLink : 811 ms\t\t 0.41 MB\t\t 0.5 MB/s\n", + "AnalysisMuonsAuxDyn.InnerDetectorPt : 460 ms\t\t 0.74 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.MuonSpectrometerPt : 460 ms\t\t 0.74 MB\t\t 1.6 MB/s\n", + "AnalysisMuonsAuxDyn.clusterLink : 811 ms\t\t 0.39 MB\t\t 0.5 MB/s\n", + "AnalysisMuonsAuxDyn.spectrometerFieldIntegral : 456 ms\t\t 0.72 MB\t\t 1.6 MB/s\n", + "AnalysisElectronsAuxDyn.ambiguityLink : 808 ms\t\t 0.39 MB\t\t 0.5 MB/s\n", + "AnalysisMuonsAuxDyn.EnergyLoss : 460 ms\t\t 0.72 MB\t\t 1.6 MB/s\n", + "AnalysisJetsAuxDyn.NNJvtPass : 462 ms\t\t 0.70 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.topoetcone20_CloseByCorr : 685 ms\t\t 0.69 MB\t\t 1.0 MB/s\n", + "AnalysisElectronsAuxDyn.topoetcone20ptCorrection : 462 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.topoetcone20 : 457 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", + "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt500_CloseByCorr : 463 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.DFCommonElectronsECIDSResult : 455 ms\t\t 0.69 MB\t\t 1.5 MB/s\n", + "AnalysisElectronsAuxDyn.neflowisol20 : 460 ms\t\t 0.68 MB\t\t 1.5 MB/s\n", + "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt500 : 458 ms\t\t 0.68 MB\t\t 1.5 MB/s\n", + "AnalysisMuonsAuxDyn.ptcone40 : 458 ms\t\t 0.66 MB\t\t 1.4 MB/s\n", + "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt1000_CloseByCorr : 458 ms\t\t 0.66 MB\t\t 1.4 MB/s\n", + "AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt1000 : 682 ms\t\t 0.65 MB\t\t 1.0 MB/s\n", + "AnalysisMuonsAuxDyn.ptvarcone40 : 455 ms\t\t 0.65 MB\t\t 1.4 MB/s\n", "AnalysisElectronsAuxDyn.f1 : 455 ms\t\t 0.65 MB\t\t 1.4 MB/s\n", - "AnalysisMuonsAuxDyn.ptcone20_Nonprompt_All_MaxWeightTTVA_pt500 : 453 ms\t\t 0.65 MB\t\t 1.4 MB/s\n", - "PrimaryVerticesAuxDyn.vertexType : 288 ms\t\t 0.64 MB\t\t 2.2 MB/s\n", - "AnalysisMuonsAuxDyn.ptvarcone30 : 452 ms\t\t 0.64 MB\t\t 1.4 MB/s\n", - "AnalysisMuonsAuxDyn.ptcone30 : 455 ms\t\t 0.64 MB\t\t 1.4 MB/s\n", - "AnalysisMuonsAuxDyn.ptcone20_Nonprompt_All_MaxWeightTTVA_pt1000 : 453 ms\t\t 0.63 MB\t\t 1.4 MB/s\n", - "AnalysisElectronsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVALooseCone_pt500 : 452 ms\t\t 0.61 MB\t\t 1.4 MB/s\n", - "AnalysisMuonsAuxDyn.CaloLRLikelihood : 450 ms\t\t 0.61 MB\t\t 1.4 MB/s\n" + "AnalysisMuonsAuxDyn.ptcone20_Nonprompt_All_MaxWeightTTVA_pt500 : 460 ms\t\t 0.65 MB\t\t 1.4 MB/s\n", + "PrimaryVerticesAuxDyn.vertexType : 337 ms\t\t 0.64 MB\t\t 1.9 MB/s\n", + "AnalysisMuonsAuxDyn.ptvarcone30 : 456 ms\t\t 0.64 MB\t\t 1.4 MB/s\n", + "AnalysisMuonsAuxDyn.ptcone30 : 457 ms\t\t 0.64 MB\t\t 1.4 MB/s\n", + "AnalysisMuonsAuxDyn.ptcone20_Nonprompt_All_MaxWeightTTVA_pt1000 : 458 ms\t\t 0.63 MB\t\t 1.4 MB/s\n", + "AnalysisElectronsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVALooseCone_pt500 : 454 ms\t\t 0.61 MB\t\t 1.3 MB/s\n", + "AnalysisMuonsAuxDyn.CaloLRLikelihood : 457 ms\t\t 0.61 MB\t\t 1.3 MB/s\n" ] } ], @@ -703,7 +764,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "55e321f6-3ae6-4af3-a9e3-b7e5e469cffa", "metadata": {}, "outputs": [ @@ -712,19 +773,19 @@ "output_type": "stream", "text": [ "reading 12 branch(es)\n", - " - read 64.08 MB in 6.76 s\n", + " - read 64.08 MB in 7.04 s\n", "reading 24 branch(es)\n", - " - read 191.08 MB in 19.10 s\n", + " - read 191.08 MB in 20.51 s\n", "reading 36 branch(es)\n", - " - read 245.91 MB in 22.42 s\n", + " - read 245.91 MB in 24.55 s\n", "reading 48 branch(es)\n", - " - read 257.35 MB in 30.04 s\n", + " - read 257.35 MB in 32.44 s\n", "reading 60 branch(es)\n", - " - read 266.31 MB in 35.72 s\n", + " - read 266.31 MB in 39.86 s\n", "reading 72 branch(es)\n", - " - read 274.57 MB in 42.14 s\n", + " - read 274.57 MB in 45.74 s\n", "reading 82 branch(es)\n", - " - read 280.93 MB in 48.16 s\n" + " - read 280.93 MB in 50.45 s\n" ] } ], @@ -759,7 +820,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "ed8b860f-5df8-4fad-b405-8c8d2e7fc871", "metadata": {}, "outputs": [ @@ -767,13 +828,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "fraction read: 5.70% in 6.76 s\n", - "fraction read: 17.01% in 19.10 s\n", - "fraction read: 21.89% in 22.42 s\n", - "fraction read: 22.91% in 30.04 s\n", - "fraction read: 23.70% in 35.72 s\n", - "fraction read: 24.44% in 42.14 s\n", - "fraction read: 25.01% in 48.16 s\n" + "fraction read: 5.70% in 7.04 s\n", + "fraction read: 17.01% in 20.51 s\n", + "fraction read: 21.89% in 24.55 s\n", + "fraction read: 22.91% in 32.44 s\n", + "fraction read: 23.70% in 39.86 s\n", + "fraction read: 24.44% in 45.74 s\n", + "fraction read: 25.01% in 50.45 s\n" ] } ], @@ -784,25 +845,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "0f10517f-c2bf-4155-a829-2123a828386e", "metadata": {}, "outputs": [ { "data": { + "image/png": "", "text/plain": [ - "(0.0, 19.676398102626532)" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -812,7 +863,7 @@ "source": [ "event_rate_in_kHz = nevts / np.fromiter(time_per_fraction_read.values(), np.float32) / 1_000\n", "\n", - "fig, (ax0, ax1) = plt.subplots(figsize=(12,4), ncols=2)\n", + "fig, (ax0, ax1) = plt.subplots(figsize=(11,4), ncols=2)\n", "\n", "ax0.plot(time_per_fraction_read.keys(), time_per_fraction_read.values(), \"o\")\n", "ax0.set_xlabel(\"fraction read\")\n", @@ -824,12 +875,15 @@ "ax1.set_xlabel(\"fraction read\")\n", "ax1.set_ylabel(\"event rate [kHz]\")\n", "ax1.set_xlim([0, ax1.get_xlim()[1]])\n", - "ax1.set_ylim([0, ax1.get_ylim()[1]])" + "ax1.set_ylim([0, ax1.get_ylim()[1]])\n", + "\n", + "fig.tight_layout()\n", + "fig.savefig(figures_dir / \"file_read_time.png\", dpi=300)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "ba6889dc-52a3-40d1-8e65-f8317437035f", "metadata": {}, "outputs": [ @@ -849,23 +903,13 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "3d41622a-a7ba-41d6-a818-d1147e1f9213", "metadata": {}, "outputs": [ { "data": { - "text/plain": [ - "(0.0, 89.82277641296386)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -884,7 +928,9 @@ "ax.set_xlabel(\"fraction read\")\n", "ax.set_ylabel(\"rate [Mbps]\")\n", "ax.set_xlim([0, ax.get_xlim()[1]])\n", - "ax.set_ylim([0, ax.get_ylim()[1]])" + "ax.set_ylim([0, ax.get_ylim()[1]])\n", + "\n", + "fig.savefig(figures_dir / \"file_read_rate.png\", dpi=300)" ] } ], diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7050ca5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[tool.jupytext] +# Always pair ipynb notebooks to py:percent files +formats = ["ipynb", "py:percent"] +notebook_metadata_filter = "all,-jupytext.text_representation.jupytext_version,-language_info.version" diff --git a/requirements.txt b/requirements.txt index a4ab75a..ccc2296 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,6 @@ snakeviz # What was needed on UChicago jupyter # func_adl_servicex_xaodr21>=2.0a1 + +# developer tools +jupytext