simple comments on model/moirai in source code (#69)

Co-authored-by: Chenghao Liu <74166079+chenghaoliu89@users.noreply.github.com>
SalesforceAIResearch · Jun 12, 2024 · 048ff5d · 048ff5d
1 parent adf7206
commit 048ff5d
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 1 deletion.
diff --git a/src/uni2ts/model/moirai/module.py b/src/uni2ts/model/moirai/module.py
@@ -61,7 +61,9 @@ class MoiraiModule(
     PyTorchModelHubMixin,
     coders={DistributionOutput: (encode_distr_output, decode_distr_output)},
 ):
-    """Contains components of Moirai to ensure implementation is identical across models"""
+    """
+    Contains components of Moirai, to ensure implementation is identical across models.
+    """
 
     def __init__(
         self,
@@ -74,6 +76,17 @@ def __init__(
         dropout_p: float,
         scaling: bool = True,
     ):
+        """Initialization of MoiraiModule.
+
+        Args:
+            distr_output (DistributionOutput): Mixture of distribution for output.
+            d_model (int): Model dimension.
+            num_layers (int): Layer numbers.
+            patch_sizes (tuple[int, ...]): atch sizes for input and output.
+            attn_dropout_p (float): Dropout rate for attention.
+            dropout_p (float): Dropout rate.
+            scaling (bool, optional): whether to use scaler on inputs. Defaults to True.
+        """
         super().__init__()
         self.d_model = d_model
         self.num_layers = num_layers
@@ -122,6 +135,27 @@ def forward(
         prediction_mask: Bool[torch.Tensor, "*batch seq_len"],
         patch_size: Int[torch.Tensor, "*batch seq_len"],
     ) -> Distribution:
+        """forward process for MoiraiModule.
+        Including 6 steps:
+        1. Employ scaler on target data and mask and ids.
+        2. Go through input linear layer with multiple patch sizes.
+        3. Mask the prediction window.
+        4. Go through the masked encoders.
+        5. Go through output layer with multiple patch sizes.
+        6. Make a transformation of distribution, and return it.
+
+        Args:
+            target (Float[torch.Tensor]): Input data.
+            observed_mask (Bool[torch.Tensor]): Mask on NaN numbers.
+            sample_id (Int[torch.Tensor]): Sample ids.
+            time_id (Int[torch.Tensor]): Time ids.
+            variate_id (Int[torch.Tensor]): Variate ids.
+            prediction_mask (Bool[torch.Tensor]): Mask on prediction window.
+            patch_size (Int[torch.Tensor]): Patch sizes for input and output layer.
+
+        Returns:
+            Distribution: returns a distribution
+        """
         loc, scale = self.scaler(
             target,
             observed_mask * ~prediction_mask.unsqueeze(-1),

diff --git a/src/uni2ts/model/moirai/pretrain.py b/src/uni2ts/model/moirai/pretrain.py
@@ -119,6 +119,20 @@ def forward(
         prediction_mask: Bool[torch.Tensor, "*batch seq_len"],
         patch_size: Int[torch.Tensor, "*batch seq_len"],
     ) -> Distribution:
+        """forward process for MoiraiPretrain, with MoiraiModule.
+
+        Args:
+            target (Float[torch.Tensor]): Input data.
+            observed_mask (Bool[torch.Tensor]): Mask on NaN numbers.
+            sample_id (Int[torch.Tensor]): Sample ids.
+            time_id (Int[torch.Tensor]): Time ids.
+            variate_id (Int[torch.Tensor]): Variate ids.
+            prediction_mask (Bool[torch.Tensor]): Mask on prediction window.
+            patch_size (Int[torch.Tensor]): Patch sizes for input and output layer.
+
+        Returns:
+            Distribution: returns a distribution
+        """
         distr = self.module(
             target=target,
             observed_mask=observed_mask,
@@ -133,6 +147,15 @@ def forward(
     def training_step(
         self, batch: dict[str, torch.Tensor], batch_idx: int
     ) -> torch.Tensor:
+        """One step of training on current batch, and get the training loss.
+
+        Args:
+            batch (dict[str, torch.Tensor]): Input batch data after multiple transformation.
+            batch_idx (int): Batch ids.
+
+        Returns:
+            torch.Tensor: Returns the loss on current batch.
+        """
         distr = self(
             **{field: batch[field] for field in list(self.seq_fields) + ["sample_id"]}
         )
@@ -168,6 +191,11 @@ def training_step(
     def validation_step(
         self, batch: dict[str, torch.Tensor], batch_idx: int, dataloader_idx: int = 0
     ) -> torch.Tensor:
+        """Returns optimizer onfiguration, including optimizer and learning-rate scheduler.
+
+        Returns:
+            dict: Return the configuration.
+        """
         distr = self(
             **{field: batch[field] for field in list(self.seq_fields) + ["sample_id"]}
         )
@@ -324,6 +352,28 @@ def configure_optimizers(self) -> dict:
 
     @property
     def train_transform_map(self) -> dict[str, Callable[..., Transformation]]:
+        """Default transformations on input data.
+        SampleDimension: Sample the number of variants for the current task
+        GetPatchSize: Get the patch size.
+        PatchCrop: Crop the patches.
+        PackFields: Pack each feature columns, including 'target' and 'past_feat_dynamic_real'.
+        AddObservedMask: Mask NaN numbers
+        ImputeTimeSeries: Fill NaN with the specified imputation method, which defaults to 0.
+        Patchify: Cut data into patches.
+        AddVariateIndex: Add variate_id
+        AddTimeIndex: Add time_id
+        MaskedPrediction: Specify the task,
+            i.e., sample the total input length, as well as sample the proportion of look-back window and prediction window length.
+        ExtendMask: Add an auxiliary mask.
+        FlatPackCollection: Pack/Merge along 'variate_id, time_id, prediction_mask, observed_mask, and target' dimensions.
+        FlatPackFields: Pack/Merge 'target'.
+        SequencifyField: sequencify the 'patch_size' field.
+        SelectFields: Output the data of predefined fields!
+
+        Returns:
+            dict[str, Callable[..., Transformation]]: Returns the list of sequential transformations.
+        """
+
         def default_train_transform():
             return (
                 SampleDimension(