Skip to content

Commit 629d7d5

Browse files
committedMar 5, 2025
remove upset diff plot
The upsetplot package is incompatible with numpy v2 and pandas v2, both which are required for lyscripts v1. To reproduce the upset diff plot, one has therefore to go back in the git history a little.
1 parent ee15c4c commit 629d7d5

File tree

7 files changed

+96
-228
lines changed

7 files changed

+96
-228
lines changed
 
1 Byte
Loading

‎dvc.lock

+49-32
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ stages:
167167
size: 21351
168168
- path: scripts/render.py
169169
hash: md5
170-
md5: c9d7f0af89624327880cc89d3ecbc291
171-
size: 1986
170+
md5: 097a0bf7d0a39981ee0b4f403bd534fc
171+
size: 2096
172172
outs:
173173
- path: 2021-clb-oropharynx/README.md
174174
hash: md5
@@ -192,8 +192,8 @@ stages:
192192
size: 28008
193193
- path: scripts/render.py
194194
hash: md5
195-
md5: c9d7f0af89624327880cc89d3ecbc291
196-
size: 1986
195+
md5: 097a0bf7d0a39981ee0b4f403bd534fc
196+
size: 2096
197197
outs:
198198
- path: 2023-clb-multisite/README.md
199199
hash: md5
@@ -217,8 +217,8 @@ stages:
217217
size: 52984
218218
- path: scripts/render.py
219219
hash: md5
220-
md5: c9d7f0af89624327880cc89d3ecbc291
221-
size: 1986
220+
md5: 097a0bf7d0a39981ee0b4f403bd534fc
221+
size: 2096
222222
outs:
223223
- path: 2023-isb-multisite/README.md
224224
hash: md5
@@ -233,8 +233,8 @@ stages:
233233
size: 166966
234234
- path: scripts/t_category.py
235235
hash: md5
236-
md5: 3e3c62e6daa7a90f7f4e40d90d00b0f4
237-
size: 1622
236+
md5: cf119e4a12ccd9a42cd55ac3bc863c4b
237+
size: 1690
238238
plot-t-category@2021-clb-oropharynx:
239239
cmd: "python scripts/t_category.py 2021-clb-oropharynx/data.csv\n"
240240
deps:
@@ -244,8 +244,8 @@ stages:
244244
size: 92744
245245
- path: scripts/t_category.py
246246
hash: md5
247-
md5: 3e3c62e6daa7a90f7f4e40d90d00b0f4
248-
size: 1622
247+
md5: cf119e4a12ccd9a42cd55ac3bc863c4b
248+
size: 1690
249249
plot-t-category@2023-clb-multisite:
250250
cmd: "python scripts/t_category.py 2023-clb-multisite/data.csv\n"
251251
deps:
@@ -255,8 +255,8 @@ stages:
255255
size: 111721
256256
- path: scripts/t_category.py
257257
hash: md5
258-
md5: 3e3c62e6daa7a90f7f4e40d90d00b0f4
259-
size: 1622
258+
md5: cf119e4a12ccd9a42cd55ac3bc863c4b
259+
size: 1690
260260
plot-t-category@2023-isb-multisite:
261261
cmd: "python scripts/t_category.py 2023-isb-multisite/data.csv\n"
262262
deps:
@@ -266,8 +266,8 @@ stages:
266266
size: 204462
267267
- path: scripts/t_category.py
268268
hash: md5
269-
md5: 3e3c62e6daa7a90f7f4e40d90d00b0f4
270-
size: 1622
269+
md5: cf119e4a12ccd9a42cd55ac3bc863c4b
270+
size: 1690
271271
plot-t-category@2025-hvh-oropharynx:
272272
cmd: "python scripts/t_category.py 2025-hvh-oropharynx/data.csv\n"
273273
deps:
@@ -277,8 +277,8 @@ stages:
277277
size: 82567
278278
- path: scripts/t_category.py
279279
hash: md5
280-
md5: 3e3c62e6daa7a90f7f4e40d90d00b0f4
281-
size: 1622
280+
md5: cf119e4a12ccd9a42cd55ac3bc863c4b
281+
size: 1690
282282
plot-age-and-sex@2021-usz-oropharynx:
283283
cmd: "python scripts/age_and_sex.py 2021-usz-oropharynx/data.csv\n"
284284
deps:
@@ -288,8 +288,8 @@ stages:
288288
size: 166966
289289
- path: scripts/age_and_sex.py
290290
hash: md5
291-
md5: fba372b310fd04a38564b4f9dab808a9
292-
size: 3160
291+
md5: 9fc68d3a1f95e03d7262ef3225b28a32
292+
size: 3252
293293
plot-age-and-sex@2021-clb-oropharynx:
294294
cmd: "python scripts/age_and_sex.py 2021-clb-oropharynx/data.csv\n"
295295
deps:
@@ -299,8 +299,8 @@ stages:
299299
size: 92744
300300
- path: scripts/age_and_sex.py
301301
hash: md5
302-
md5: fba372b310fd04a38564b4f9dab808a9
303-
size: 3160
302+
md5: 9fc68d3a1f95e03d7262ef3225b28a32
303+
size: 3252
304304
plot-age-and-sex@2023-clb-multisite:
305305
cmd: "python scripts/age_and_sex.py 2023-clb-multisite/data.csv\n"
306306
deps:
@@ -310,8 +310,8 @@ stages:
310310
size: 111721
311311
- path: scripts/age_and_sex.py
312312
hash: md5
313-
md5: fba372b310fd04a38564b4f9dab808a9
314-
size: 3160
313+
md5: 9fc68d3a1f95e03d7262ef3225b28a32
314+
size: 3252
315315
plot-age-and-sex@2023-isb-multisite:
316316
cmd: "python scripts/age_and_sex.py 2023-isb-multisite/data.csv\n"
317317
deps:
@@ -321,8 +321,8 @@ stages:
321321
size: 204462
322322
- path: scripts/age_and_sex.py
323323
hash: md5
324-
md5: fba372b310fd04a38564b4f9dab808a9
325-
size: 3160
324+
md5: 9fc68d3a1f95e03d7262ef3225b28a32
325+
size: 3252
326326
plot-age-and-sex@2025-hvh-oropharynx:
327327
cmd: "python scripts/age_and_sex.py 2025-hvh-oropharynx/data.csv\n"
328328
deps:
@@ -332,8 +332,8 @@ stages:
332332
size: 82567
333333
- path: scripts/age_and_sex.py
334334
hash: md5
335-
md5: fba372b310fd04a38564b4f9dab808a9
336-
size: 3160
335+
md5: 9fc68d3a1f95e03d7262ef3225b28a32
336+
size: 3252
337337
plot-subsite@2021-usz-oropharynx:
338338
cmd: "python scripts/subsite.py --data 2021-usz-oropharynx/data.csv\n"
339339
deps:
@@ -398,8 +398,8 @@ stages:
398398
size: 92744
399399
- path: scripts/bar_plot.py
400400
hash: md5
401-
md5: 094dfad6e1e55f49472dac08829ef089
402-
size: 2577
401+
md5: 16bb00580dbb46db7f52f6f9a3f59276
402+
size: 2628
403403
bar-plot@2023-clb-multisite:
404404
cmd: python scripts/bar_plot.py 2023-clb-multisite/data.csv
405405
deps:
@@ -409,8 +409,8 @@ stages:
409409
size: 111721
410410
- path: scripts/bar_plot.py
411411
hash: md5
412-
md5: 094dfad6e1e55f49472dac08829ef089
413-
size: 2577
412+
md5: 16bb00580dbb46db7f52f6f9a3f59276
413+
size: 2628
414414
bar-plot@2023-isb-multisite:
415415
cmd: python scripts/bar_plot.py 2023-isb-multisite/data.csv
416416
deps:
@@ -420,5 +420,22 @@ stages:
420420
size: 204462
421421
- path: scripts/bar_plot.py
422422
hash: md5
423-
md5: 094dfad6e1e55f49472dac08829ef089
424-
size: 2577
423+
md5: 16bb00580dbb46db7f52f6f9a3f59276
424+
size: 2628
425+
factors-diff@2021-usz-oropharynx-2025-hvh-oropharynx:
426+
cmd: "python scripts/factors_diff.py --first-dataset 2021-usz-oropharynx --second-dataset
427+
2025-hvh-oropharynx --commit ee15c4cabdd160a10783fcf85ffe1cbfeb5c4826 --repo
428+
rmnldwg/lydata.private\n"
429+
deps:
430+
- path: 2021-usz-oropharynx/data.csv
431+
hash: md5
432+
md5: a9e4043ed6a273c609fa16523bcae455
433+
size: 166966
434+
- path: 2025-hvh-oropharynx/data.csv
435+
hash: md5
436+
md5: 7baa8021efbff0126fcf297e52106168
437+
size: 82567
438+
- path: scripts/factors_diff.py
439+
hash: md5
440+
md5: 182812f0fe01deba4fd1669153b88ae7
441+
size: 3775

‎dvc.yaml

+2-15
Original file line numberDiff line numberDiff line change
@@ -183,20 +183,6 @@ stages:
183183
cmd:
184184
python scripts/bar_plot.py ${item}/data.csv
185185

186-
upset-diff:
187-
matrix:
188-
first: [2021-usz-oropharynx]
189-
second: [2025-hvh-oropharynx]
190-
cmd: >
191-
python scripts/upset_diff.py
192-
--first-dataset ${item.first}
193-
--second-dataset ${item.second}
194-
--commit eb8a38ec56269d8c8f23d7155ecaad93f399b5f4
195-
deps:
196-
- ${item.first}/data.csv
197-
- ${item.second}/data.csv
198-
- scripts/upset_diff.py
199-
200186
factors-diff:
201187
matrix:
202188
first: [2021-usz-oropharynx]
@@ -205,7 +191,8 @@ stages:
205191
python scripts/factors_diff.py
206192
--first-dataset ${item.first}
207193
--second-dataset ${item.second}
208-
--commit eb8a38ec56269d8c8f23d7155ecaad93f399b5f4
194+
--commit ee15c4cabdd160a10783fcf85ffe1cbfeb5c4826
195+
--repo rmnldwg/lydata.private
209196
deps:
210197
- ${item.first}/data.csv
211198
- ${item.second}/data.csv

‎requirements.in

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
2-
numpy == 2.2.2
1+
numpy == 2.2.3
32
pandas == 2.2.3
43
matplotlib == 3.10.0
54
dvc == 3.59.0
@@ -9,4 +8,3 @@ icd10-cm == 0.0.5
98
lyscripts == 1.0.0.a5
109
lazydocs == 0.4.8
1110
tueplots == 0.0.17
12-
upsetplot == 0.9.0

‎requirements.txt

+3-4
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ numexpr==2.10.2
401401
# via
402402
# blosc2
403403
# tables
404-
numpy==2.2.2
404+
numpy==1.26.4
405405
# via
406406
# -r requirements.in
407407
# blosc2
@@ -444,15 +444,15 @@ packaging==24.2
444444
# nbconvert
445445
# pandera
446446
# tables
447-
pandas==2.2.3
447+
pandas==1.5.3
448448
# via
449449
# -r requirements.in
450450
# lydata
451451
# lymph-model
452452
# lyscripts
453453
# pandera
454454
# upsetplot
455-
pandera==0.23.0
455+
pandera==0.22.1
456456
# via lydata
457457
pandocfilters==1.5.1
458458
# via nbconvert
@@ -727,7 +727,6 @@ tzdata==2025.1
727727
# via
728728
# celery
729729
# kombu
730-
# pandas
731730
upsetplot==0.9.0
732731
# via -r requirements.in
733732
uri-template==1.3.0

‎scripts/factors_diff.py

+41-2
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,58 @@
11
"""Plot the difference of clinicopathological factors in two datasets as a bar plot."""
22

3+
import argparse
34
from pathlib import Path
45

56
import matplotlib.pyplot as plt
67
from lyscripts.plots import COLORS
78
from shared import MPLSTYLE
89
from tueplots import figsizes, fontsizes
9-
from upset_diff import get_parser, kwargs_from_option
1010

1111
import lydata
1212
from lydata import C
1313

1414
OUTPUT_NAME = Path(__file__).with_suffix(".png").name
1515

1616

17+
def get_parser() -> argparse.ArgumentParser:
18+
"""Return the argument parser."""
19+
parser = argparse.ArgumentParser(description=__doc__)
20+
parser.add_argument(
21+
"--repo",
22+
type=str,
23+
help="The repository from which to load the datasets.",
24+
default="rmnldwg/lydata",
25+
)
26+
parser.add_argument(
27+
"--commit",
28+
type=str,
29+
help="The commit hash at which to compare the datasets.",
30+
default="5b85184ecece020f509ab0c9f05aa5c81257ffd3",
31+
)
32+
parser.add_argument(
33+
"--first-dataset",
34+
type=str,
35+
default="2021-usz-oropharynx",
36+
)
37+
parser.add_argument(
38+
"--second-dataset",
39+
type=str,
40+
default="2025-hvh-oropharynx",
41+
)
42+
return parser
43+
44+
45+
def kwargs_from_option(option: str) -> dict:
46+
"""Return the load_kwargs for the given option."""
47+
year, institution, subsite = option.split("-")
48+
return {
49+
"year": int(year),
50+
"institution": institution,
51+
"subsite": subsite,
52+
"use_github": True,
53+
}
54+
55+
1756
def create_ax() -> plt.Axes:
1857
"""Create the axis for the plot."""
1958
plt.style.use(MPLSTYLE)
@@ -28,7 +67,7 @@ def main() -> None:
2867
args = get_parser().parse_args()
2968

3069
load_kwargs = {
31-
"repo": "rmnldwg/lydata",
70+
"repo_name": args.repo,
3271
"ref": args.commit,
3372
}
3473

‎scripts/upset_diff.py

-172
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.