diff --git a/cmsdb/campaigns/run3_2022_postEE_nano_v12/data.py b/cmsdb/campaigns/run3_2022_postEE_nano_v12/data.py
index e343dcb6..63d3efe8 100644
--- a/cmsdb/campaigns/run3_2022_postEE_nano_v12/data.py
+++ b/cmsdb/campaigns/run3_2022_postEE_nano_v12/data.py
@@ -4,6 +4,8 @@
 CMS datasets from the 2022 post-EE data-taking campaign
 """
 
+from order import DatasetInfo
+
 import cmsdb.processes as procs
 from cmsdb.campaigns.run3_2022_postEE_nano_v12 import campaign_run3_2022_postEE_nano_v12 as cpn
 
@@ -120,11 +122,18 @@
     id=14783435,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022E-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=29,
-    n_events=12873327,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022E-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [],
+            },
+            n_files=29,  # 29-0
+            n_events=12873327,
+        ),
+    ),
     aux={
         "era": "E",
     },
@@ -135,11 +144,21 @@
     id=14784482,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022F-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=95,
-    n_events=38219969,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022F-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [
+                    "/store/data/Run2022F/MuonEG/NANOAOD/22Sep2023-v1/50000/4d76213a-ef14-411a-9558-559a6df3f978.root",  # empty  # noqa: E501
+                    "/store/data/Run2022F/MuonEG/NANOAOD/22Sep2023-v1/50000/4fb72196-3b02-4499-8f6c-a54e15692b32.root",  # empty  # noqa: E501
+                ],
+            },
+            n_files=93,  # 95-2
+            n_events=38219969,
+        ),
+    ),
     aux={
         "era": "F",
     },
@@ -150,11 +169,20 @@
     id=14784485,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022G-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=27,
-    n_events=6238527,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022G-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [
+                    "/store/data/Run2022G/MuonEG/NANOAOD/22Sep2023-v1/2520000/cd404eb6-8218-4787-b5ed-af6cd9fe3750.root",  # empty  # noqa: E501
+                ],
+            },
+            n_files=26,  # 27-1
+            n_events=6238527,
+        ),
+    ),
     aux={
         "era": "G",
     },
diff --git a/cmsdb/campaigns/run3_2022_preEE_nano_v12/data.py b/cmsdb/campaigns/run3_2022_preEE_nano_v12/data.py
index 8e0c414f..b87b07dd 100644
--- a/cmsdb/campaigns/run3_2022_preEE_nano_v12/data.py
+++ b/cmsdb/campaigns/run3_2022_preEE_nano_v12/data.py
@@ -4,6 +4,8 @@
 CMS datasets from the 2022 pre-EE data-taking campaign
 """
 
+from order import DatasetInfo
+
 import cmsdb.processes as procs
 from cmsdb.campaigns.run3_2022_preEE_nano_v12 import campaign_run3_2022_preEE_nano_v12 as cpn
 
@@ -120,11 +122,20 @@
     id=14783289,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022A-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=5,
-    n_events=12,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022A-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [
+                    "/store/data/Run2022A/MuonEG/NANOAOD/22Sep2023-v1/50000/9a127bdb-9522-4f49-b754-67bb9152c0b3.root",  # empty  # noqa: E501
+                ],
+            },
+            n_files=4,  # 5-1
+            n_events=12,
+        ),
+    ),
     aux={
         "era": "A",
     },
@@ -135,11 +146,20 @@
     id=14784076,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022B-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=7,
-    n_events=254803,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022B-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [
+                    "/store/data/Run2022B/MuonEG/NANOAOD/22Sep2023-v1/50000/947809ff-822e-4a3a-84a2-d3fe84fc2573.root",  # empty  # noqa: E501
+                ],
+            },
+            n_files=6,  # 7-1
+            n_events=254803,
+        ),
+    ),
     aux={
         "era": "B",
     },
@@ -150,11 +170,18 @@
     id=14784125,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022C-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=28,
-    n_events=15768439,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022C-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [],
+            },
+            n_files=28,  # 28-0
+            n_events=15768439,
+        ),
+    ),
     aux={
         "era": "C",
     },
@@ -165,11 +192,18 @@
     id=14784209,
     is_data=True,
     processes=[procs.data_muoneg],
-    keys=[
-        "/MuonEG/Run2022D-22Sep2023-v1/NANOAOD",  # noqa
-    ],
-    n_files=16,
-    n_events=8007031,
+    info=dict(
+        nominal=DatasetInfo(
+            keys=[
+                "/MuonEG/Run2022D-22Sep2023-v1/NANOAOD",  # noqa: E501
+            ],
+            aux={
+                "broken_files": [],
+            },
+            n_files=16,  # 16-0
+            n_events=8007031,
+        ),
+    ),
     aux={
         "era": "D",
     },
diff --git a/scripts/get_das_info.py b/scripts/get_das_info.py
index ac7ebe74..7733db2b 100644
--- a/scripts/get_das_info.py
+++ b/scripts/get_das_info.py
@@ -26,30 +26,33 @@ def get_generator_name(name: str) -> str:
         return ""
 
 
-def convert_default(data: dict, placeholder="PLACEHOLDER") -> str:
+def get_broken_files_str(data: dict, n_spaces: int = 20) -> str:
     """
-    Function that converts dataset info into one order Dataset per query
+    Function that returns a string represenatation of broken files
     """
-    generator = get_generator_name(data["name"])
-    return f"""cpn.add_dataset(
-    name="{placeholder}{generator}",
-    id={data['dataset_id']},
-    processes=[procs.{placeholder}],
-    keys=[
-        "{data['name']}",  # noqa
-    ],
-    n_files={data['nfiles']},
-    n_events={data['nevents']},
-)
-"""
+
+    broken_files_list = [
+        f'"{d}",  # broken  # noqa: E501' for d in data["broken_files"]
+    ] + [
+        f'"{d}",  # empty  # noqa: E501' for d in data["empty_files"] if d not in data["broken_files"]
+    ]
+
+    if not broken_files_list:
+        return ""
+    else:
+        return (
+            f"\n{' '* n_spaces}" +
+            f"\n{' '* n_spaces}".join(broken_files_list) +
+            f"\n{' '* (n_spaces - 4)}"
+        )
 
 
-def convert_variation(data: dict, placeholder="PLACEHOLDER") -> str:
+def convert_default(data: dict, placeholder="PLACEHOLDER") -> str:
     """
-    Function that converts dataset info into one order Dataset per query. Stores the dataset info
-    in a dict with the dataset type as key.
+    Function that converts dataset info into one order Dataset per query
     """
     generator = get_generator_name(data["name"])
+
     return f"""cpn.add_dataset(
     name="{placeholder}{generator}",
     id={data['dataset_id']},
@@ -57,9 +60,12 @@ def convert_variation(data: dict, placeholder="PLACEHOLDER") -> str:
     info=dict(
         nominal=DatasetInfo(
             keys=[
-                "{data['name']}",  # noqa
+                "{data['name']}",  # noqa: E501
             ],
-            n_files={data['nfiles']},
+            aux={{
+                "broken_files": [{get_broken_files_str(data)}],
+            }},
+            n_files={data['nfiles_good']},  # {data["nfiles"]}-{data["nfiles_bad"]}
             n_events={data['nevents']},
         ),
     ),
@@ -128,9 +134,12 @@ def convert_top(data: dict, placeholder="PLACEHOLDER") -> str:
     info=dict(
         nominal=DatasetInfo(
             keys=[
-                "{data['name']}",  # noqa
+                "{data['name']}",  # noqa: E501
             ],
-            n_files={data['nfiles']},
+            aux={{
+                "broken_files": [{get_broken_files_str(data)}],
+            }},
+            n_files={data['nfiles_good']},  # {data["nfiles"]}-{data["nfiles_bad"]}
             n_events={data['nevents']},
         ),
     ),
@@ -139,9 +148,12 @@ def convert_top(data: dict, placeholder="PLACEHOLDER") -> str:
         # comment out this dataset
         return f"""        # {identifier}=DatasetInfo(
         #     keys=[
-        #         "{data['name']}",  # noqa
+        #         "{data['name']}",  # noqa: E501
         #     ],
-        #     n_files={data['nfiles']},
+        #     aux={{
+        #         "broken_files": [{get_broken_files_str(data)}],
+        #     }},
+        #     n_files={data['nfiles_good']},  # {data["nfiles"]}-{data["nfiles_bad"]}
         #     n_events={data['nevents']},
         # ),"""
     elif dataset_type == "ignore":
@@ -150,9 +162,12 @@ def convert_top(data: dict, placeholder="PLACEHOLDER") -> str:
         # some known variation of the dataset
         return f"""        {dataset_type}=DatasetInfo(
             keys=[
-                "{data['name']}",  # noqa
+                "{data['name']}",  # noqa: E501
             ],
-            n_files={data['nfiles']},
+            aux={{
+                "broken_files": [{get_broken_files_str(data)}],
+            }},
+            n_files={data['nfiles_good']},  # {data["nfiles"]}-{data["nfiles_bad"]}
             n_events={data['nevents']},
         ),"""
 
@@ -168,25 +183,22 @@ def convert_minimal(data: dict) -> str:
     """
     Function that only returns the dataset key + number of events.
     """
-    return f"""{data['name']}\nFiles: {data['nfiles']}\nEvents: {data['nevents']}\n"""
+    return f"""{data['name']}\nFiles: {data['nfiles_good']}\nEvents: {data['nevents']}\n"""
 
 
 convert_functions = {
     "default": convert_default,
-    "variation": convert_variation,
     "keys": convert_keys,
     "top": convert_top,
     "minimal": convert_minimal,
 }
 
 
-def get_das_info(
-    dataset: str,
-) -> dict:
+def load_das_info(dataset: str, add_file_info: bool = False) -> dict:
     from law.util import interruptable_popen
 
     # call dasgoclient command
-    cmd = f"dasgoclient -query='dataset={dataset}' -json"
+    cmd = f"dasgoclient -query='{'file ' if add_file_info else ''}dataset={dataset}' -json"
     code, out, _ = interruptable_popen(
         cmd,
         shell=True,
@@ -196,16 +208,44 @@ def get_das_info(
     if code != 0:
         raise Exception(f"dasgoclient query failed:\n{out}")
     infos = json.loads(out)
+
+    return infos
+
+
+def get_das_info(dataset: str) -> dict:
     info_of_interest = {"name": dataset}
-    for info in infos:
-        dataset_info = info["dataset"][0]
-        # Get json format of single das_string gives multiple dictornaries with different info
-        # Avoid to print multiple infos twice and ask specificly for the kew of interest
-        if "dataset_info" in info["das"]["services"][0]:
-            info_of_interest["dataset_id"] = dataset_info.get("dataset_id", "")
-        elif "filesummaries" in info["das"]["services"][0]:
-            info_of_interest["nfiles"] = dataset_info.get("nfiles", "")
-            info_of_interest["nevents"] = dataset_info.get("nevents", "")
+
+    file_infos = load_das_info(dataset, add_file_info=True)
+
+    info_of_interest["dataset_id"] = file_infos[0]["file"][0]["dataset_id"]
+
+    empty_files_filter = lambda info: info["file"][0]["nevents"] == 0
+    broken_files_filter = lambda info: info["file"][0]["is_file_valid"] == 0
+
+    good_files = list(filter(lambda x: not broken_files_filter(x) and not empty_files_filter(x), file_infos))
+
+    dataset_id = {info["file"][0]["dataset_id"] for info in good_files}
+    if len(dataset_id) == 1:
+        info_of_interest["dataset_id"] = dataset_id.pop()
+    else:
+        raise ValueError(f"Multiple dataset IDs ({dataset_id}) found for dataset {dataset}")
+
+    info_of_interest["nfiles"] = len(file_infos)
+    info_of_interest["nfiles_good"] = len(good_files)
+    info_of_interest["nevents"] = sum(info["file"][0]["nevents"] for info in good_files)
+
+    empty_files = [
+        info["file"][0]["name"]
+        for info in filter(empty_files_filter, file_infos)
+    ]
+    broken_files = [
+        info["file"][0]["name"]
+        for info in filter(broken_files_filter, file_infos)
+    ]
+    info_of_interest["empty_files"] = empty_files
+    info_of_interest["broken_files"] = broken_files
+
+    info_of_interest["nfiles_bad"] = len(set(empty_files + broken_files))
 
     return info_of_interest
 
@@ -215,8 +255,6 @@ def print_das_info(
     keys_of_interest: tuple | None = None,
     convert_function_str: str | None = None,
 ):
-    from law.util import interruptable_popen
-
     # get the requested convert function
     convert_function = convert_functions[convert_function_str]
 
@@ -224,7 +262,7 @@ def print_das_info(
         # set default keys of interest
         # NOTE: this attribute is currently not used
         keys_of_interest = keys_of_interest or (
-            "name", "dataset_id", "nfiles", "nevents",
+            "name", "dataset_id", "nfiles", "nevents", "empty_files", "broken_files",
         )
 
         wildcard = "*" in das_string
@@ -234,16 +272,7 @@ def print_das_info(
             datasets.append(das_string)
         else:
             # using a wildcard leads to a different structer in json format
-            cmd = f"dasgoclient -query='dataset={das_string}' -json"
-            code, out, _ = interruptable_popen(
-                cmd,
-                shell=True,
-                stdout=subprocess.PIPE,
-                executable="/bin/bash",
-            )
-            if code != 0:
-                raise Exception(f"dasgoclient query failed:\n{out}")
-            infos = json.loads(out)
+            infos = load_das_info(das_string, add_file_info=False)
             for info in infos:
                 dataset_name = info.get("dataset", [])[0].get("name", "")
                 datasets.append(dataset_name)