docs: update docs and some text in code;

WenjieDu · Dec 21, 2023 · 5dc5ba3 · 5dc5ba3
1 parent 022ee07
commit 5dc5ba3
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -150,10 +150,10 @@ We present you a usage example of imputing missing values in time series with Py
 ``` python
 import numpy as np
 from sklearn.preprocessing import StandardScaler
-from pygrinder import mcar, masked_fill
+from pygrinder import mcar
 from pypots.data import load_specific_dataset
 from pypots.imputation import SAITS
-from pypots.utils.metrics import cal_mae
+from pypots.utils.metrics import calc_mae
 
 # Data preprocessing. Tedious, but PyPOTS can help.
 data = load_specific_dataset('physionet_2012')  # PyPOTS will automatically download and extract it.
@@ -162,17 +162,18 @@ num_samples = len(X['RecordID'].unique())
 X = X.drop(['RecordID', 'Time'], axis = 1)
 X = StandardScaler().fit_transform(X.to_numpy())
 X = X.reshape(num_samples, 48, -1)
-X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
-X = masked_fill(X, 1 - missing_mask, np.nan)
-dataset = {"X": X}
-print(dataset["X"].shape)  # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
+X_ori = X  # keep X_ori for validation
+X = mcar(X, 0.1)  # randomly hold out 10% observed values as ground truth
+dataset = {"X": X}  # X for model input
+print(X.shape)  # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
 
 # Model training. This is PyPOTS showtime.
 saits = SAITS(n_steps=48, n_features=37, n_layers=2, d_model=256, d_inner=128, n_heads=4, d_k=64, d_v=64, dropout=0.1, epochs=10)
 # Here I use the whole dataset as the training set because ground truth is not visible to the model, you can also split it into train/val/test sets
 saits.fit(dataset)
 imputation = saits.impute(dataset)  # impute the originally-missing values and artificially-missing values
-mae = cal_mae(imputation, X_intact, indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)
+indicating_mask = np.isnan(X) ^ np.isnan(X_ori)  # indicating mask for imputation error calculation
+mae = calc_mae(imputation, np.nan_to_num(X_ori), indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)
 ```
 </details>
 

diff --git a/docs/examples.rst b/docs/examples.rst
@@ -22,10 +22,10 @@ You can also find a simple and quick-start tutorial notebook on Google Colab wit
 
     import numpy as np
     from sklearn.preprocessing import StandardScaler
-    from pygrinder import mcar, masked_fill
+    from pygrinder import mcar
     from pypots.data import load_specific_dataset
     from pypots.imputation import SAITS
-    from pypots.utils.metrics import cal_mae
+    from pypots.utils.metrics import calc_mae
 
     # Data preprocessing. Tedious, but PyPOTS can help. 🤓
     data = load_specific_dataset('physionet_2012')  # PyPOTS will automatically download and extract it.
@@ -34,10 +34,10 @@ You can also find a simple and quick-start tutorial notebook on Google Colab wit
     X = X.drop(['RecordID', 'Time'], axis = 1)
     X = StandardScaler().fit_transform(X.to_numpy())
     X = X.reshape(num_samples, 48, -1)
-    X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
-    X = masked_fill(X, 1 - missing_mask, np.nan)
-    dataset = {"X": X}
-    print(dataset["X"].shape)  # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
+    X_ori = X  # keep X_ori for validation
+    X = mcar(X, 0.1)  # randomly hold out 10% observed values as ground truth
+    dataset = {"X": X}  # X for model input
+    print(X.shape)  # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
 
     # initialize the model
     saits = SAITS(
@@ -60,7 +60,8 @@ You can also find a simple and quick-start tutorial notebook on Google Colab wit
     # impute the originally-missing values and artificially-missing values
     imputation = saits.impute(dataset)
     # calculate mean absolute error on the ground truth (artificially-missing values)
-    mae = cal_mae(imputation, X_intact, indicating_mask)
+    indicating_mask = np.isnan(X) ^ np.isnan(X_ori)  # indicating mask for imputation error calculation
+    mae = calc_mae(imputation, np.nan_to_num(X_ori), indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)
 
     # the best model has been already saved, but you can still manually save it with function save_model() as below
     saits.save_model(saving_dir="examples/saits",file_name="manually_saved_saits_model")

diff --git a/pypots/base.py b/pypots/base.py
@@ -287,7 +287,7 @@ def save(
                 torch.save(self.model.module, saving_path)
             else:
                 torch.save(self.model, saving_path)
-            logger.info(f"Saved the model to {saving_path}.")
+            logger.info(f"Saved the model to {saving_path}")
         except Exception as e:
             raise RuntimeError(
                 f'Failed to save the model to "{saving_path}" because of the below error! \n{e}'
@@ -543,7 +543,7 @@ def _print_model_size(self) -> None:
         """Print the number of trainable parameters in the initialized NN model."""
         num_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
         logger.info(
-            f"A {self.__class__.__name__} model initialized with the given hyperparameters, "
+            f"{self.__class__.__name__} model initialized with the given hyperparameters, "
             f"the number of trainable parameters: {num_params:,}"
         )