fix: Removed unnecesseary try-except statement

IVproger · Jul 25, 2024 · 4225b16 · 4225b16
1 parent dc0e462
commit 4225b16
Showing 1 changed file with 51 additions and 57 deletions.
diff --git a/src/data.py b/src/data.py
@@ -24,63 +24,57 @@ def sample_data(cfg: DictConfig):
     The function to sample the data from the given URL and save it to the sample path.
     Returns both the sampled data and the updated configuration settings without updating the real config files.
     """
-    try:
-        datastore_path = cfg.data.datastore_path
-
-        # Create datastore directory if not exists
-        Path(datastore_path).parent.mkdir(exist_ok=True, parents=True)
-
-        # Check if the source data is available, if not download it.
-        if not os.path.exists(datastore_path):
-            print("Downloading data from: ", cfg.data.url)
-            gdown.download(cfg.data.url, datastore_path, quiet=False, use_cookies=False)
-
-        # Determine the total number of rows in the file without loading it entirely
-        total_rows = sum(1 for row in open(datastore_path, "r")) - 1  # Exclude header
-
-        # Calculate the sample size
-        sample_size = math.ceil(total_rows * cfg.data.sample_size)
-
-        # Determine the start row for sampling
-        start_row = (
-            0
-            if cfg.data.last_included_row_number < 0
-            else (cfg.data.last_included_row_number + 1) % total_rows
-        )
-
-        # If the start_row + sample_size exceeds total_rows, adjust the sample size
-        if start_row + sample_size > total_rows:
-            sample_size = total_rows - start_row
-
-        # Load only the necessary rows into memory
-        skiprows = range(
-            1, start_row + 1
-        )  # Skip rows before the start_row, keeping header
-        nrows = sample_size  # Number of rows to read
-        data = pd.read_csv(datastore_path, skiprows=skiprows, nrows=nrows)
-
-        print("Sampling data...")
-        resulted_sample = data
-
-        # Create a deep copy of cfg to modify without affecting the original
-        updated_cfg = copy.deepcopy(cfg)
-
-        # Update the configuration for last included row number in the copy
-        new_last_included_row_number = start_row + sample_size - 1
-        updated_cfg.data.last_included_row_number = (
-            new_last_included_row_number % total_rows
-        )
-
-        # Increment and update the data version in the copy
-        new_version = f"v{updated_cfg.data.version_number + 1}.0"
-        updated_cfg.data.data_version = new_version
-        updated_cfg.data.version_number = updated_cfg.data.version_number + 1
-
-        # Return both the sampled data and the updated configuration
-        return resulted_sample, updated_cfg
-    except Exception as e:
-        print("Error in loading or sampling the data: ", e)
-        return None, cfg
+    datastore_path = cfg.data.datastore_path
+
+    # Create datastore directory if not exists
+    Path(datastore_path).parent.mkdir(exist_ok=True, parents=True)
+
+    # Check if the source data is available, if not download it.
+    if not os.path.exists(datastore_path):
+        print("Downloading data from: ", cfg.data.url)
+        gdown.download(cfg.data.url, datastore_path, quiet=False, use_cookies=False)
+
+    # Determine the total number of rows in the file without loading it entirely
+    total_rows = sum(1 for row in open(datastore_path, "r")) - 1  # Exclude header
+
+    # Calculate the sample size
+    sample_size = math.ceil(total_rows * cfg.data.sample_size)
+
+    # Determine the start row for sampling
+    start_row = (
+        0
+        if cfg.data.last_included_row_number < 0
+        else (cfg.data.last_included_row_number + 1) % total_rows
+    )
+
+    # If the start_row + sample_size exceeds total_rows, adjust the sample size
+    if start_row + sample_size > total_rows:
+        sample_size = total_rows - start_row
+
+    # Load only the necessary rows into memory
+    skiprows = range(1, start_row + 1)  # Skip rows before the start_row, keeping header
+    nrows = sample_size  # Number of rows to read
+    data = pd.read_csv(datastore_path, skiprows=skiprows, nrows=nrows)
+
+    print("Sampling data...")
+    resulted_sample = data
+
+    # Create a deep copy of cfg to modify without affecting the original
+    updated_cfg = copy.deepcopy(cfg)
+
+    # Update the configuration for last included row number in the copy
+    new_last_included_row_number = start_row + sample_size - 1
+    updated_cfg.data.last_included_row_number = (
+        new_last_included_row_number % total_rows
+    )
+
+    # Increment and update the data version in the copy
+    new_version = f"v{updated_cfg.data.version_number + 1}.0"
+    updated_cfg.data.data_version = new_version
+    updated_cfg.data.version_number = updated_cfg.data.version_number + 1
+
+    # Return both the sampled data and the updated configuration
+    return resulted_sample, updated_cfg
 
 
 def validate_initial_data(cfg: DictConfig, df: pd.DataFrame):