Skip to content

Commit

Permalink
docs: update docs and some text in code;
Browse files Browse the repository at this point in the history
  • Loading branch information
WenjieDu committed Dec 21, 2023
1 parent 022ee07 commit 5dc5ba3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 16 deletions.
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ We present you a usage example of imputing missing values in time series with Py
``` python
import numpy as np
from sklearn.preprocessing import StandardScaler
from pygrinder import mcar, masked_fill
from pygrinder import mcar
from pypots.data import load_specific_dataset
from pypots.imputation import SAITS
from pypots.utils.metrics import cal_mae
from pypots.utils.metrics import calc_mae
# Data preprocessing. Tedious, but PyPOTS can help.
data = load_specific_dataset('physionet_2012') # PyPOTS will automatically download and extract it.
Expand All @@ -162,17 +162,18 @@ num_samples = len(X['RecordID'].unique())
X = X.drop(['RecordID', 'Time'], axis = 1)
X = StandardScaler().fit_transform(X.to_numpy())
X = X.reshape(num_samples, 48, -1)
X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
X = masked_fill(X, 1 - missing_mask, np.nan)
dataset = {"X": X}
print(dataset["X"].shape) # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
X_ori = X # keep X_ori for validation
X = mcar(X, 0.1) # randomly hold out 10% observed values as ground truth
dataset = {"X": X} # X for model input
print(X.shape) # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
# Model training. This is PyPOTS showtime.
saits = SAITS(n_steps=48, n_features=37, n_layers=2, d_model=256, d_inner=128, n_heads=4, d_k=64, d_v=64, dropout=0.1, epochs=10)
# Here I use the whole dataset as the training set because ground truth is not visible to the model, you can also split it into train/val/test sets
saits.fit(dataset)
imputation = saits.impute(dataset) # impute the originally-missing values and artificially-missing values
mae = cal_mae(imputation, X_intact, indicating_mask) # calculate mean absolute error on the ground truth (artificially-missing values)
indicating_mask = np.isnan(X) ^ np.isnan(X_ori) # indicating mask for imputation error calculation
mae = calc_mae(imputation, np.nan_to_num(X_ori), indicating_mask) # calculate mean absolute error on the ground truth (artificially-missing values)
```
</details>

Expand Down
15 changes: 8 additions & 7 deletions docs/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ You can also find a simple and quick-start tutorial notebook on Google Colab wit
import numpy as np
from sklearn.preprocessing import StandardScaler
from pygrinder import mcar, masked_fill
from pygrinder import mcar
from pypots.data import load_specific_dataset
from pypots.imputation import SAITS
from pypots.utils.metrics import cal_mae
from pypots.utils.metrics import calc_mae
# Data preprocessing. Tedious, but PyPOTS can help. 🤓
data = load_specific_dataset('physionet_2012') # PyPOTS will automatically download and extract it.
Expand All @@ -34,10 +34,10 @@ You can also find a simple and quick-start tutorial notebook on Google Colab wit
X = X.drop(['RecordID', 'Time'], axis = 1)
X = StandardScaler().fit_transform(X.to_numpy())
X = X.reshape(num_samples, 48, -1)
X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
X = masked_fill(X, 1 - missing_mask, np.nan)
dataset = {"X": X}
print(dataset["X"].shape) # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
X_ori = X # keep X_ori for validation
X = mcar(X, 0.1) # randomly hold out 10% observed values as ground truth
dataset = {"X": X} # X for model input
print(X.shape) # (11988, 48, 37), 11988 samples, 48 time steps, 37 features
# initialize the model
saits = SAITS(
Expand All @@ -60,7 +60,8 @@ You can also find a simple and quick-start tutorial notebook on Google Colab wit
# impute the originally-missing values and artificially-missing values
imputation = saits.impute(dataset)
# calculate mean absolute error on the ground truth (artificially-missing values)
mae = cal_mae(imputation, X_intact, indicating_mask)
indicating_mask = np.isnan(X) ^ np.isnan(X_ori) # indicating mask for imputation error calculation
mae = calc_mae(imputation, np.nan_to_num(X_ori), indicating_mask) # calculate mean absolute error on the ground truth (artificially-missing values)
# the best model has been already saved, but you can still manually save it with function save_model() as below
saits.save_model(saving_dir="examples/saits",file_name="manually_saved_saits_model")
Expand Down
4 changes: 2 additions & 2 deletions pypots/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def save(
torch.save(self.model.module, saving_path)
else:
torch.save(self.model, saving_path)
logger.info(f"Saved the model to {saving_path}.")
logger.info(f"Saved the model to {saving_path}")
except Exception as e:
raise RuntimeError(
f'Failed to save the model to "{saving_path}" because of the below error! \n{e}'
Expand Down Expand Up @@ -543,7 +543,7 @@ def _print_model_size(self) -> None:
"""Print the number of trainable parameters in the initialized NN model."""
num_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
logger.info(
f"A {self.__class__.__name__} model initialized with the given hyperparameters, "
f"{self.__class__.__name__} model initialized with the given hyperparameters, "
f"the number of trainable parameters: {num_params:,}"
)

Expand Down

0 comments on commit 5dc5ba3

Please sign in to comment.