Merge pull request #297 from apax-hub/dev

v0.5.0
apax-hub · Jul 24, 2024 · b268f30 · b268f30
2 parents 4793205 + fa17ee2
commit b268f30
Show file tree

Hide file tree

Showing 59 changed files with 3,165 additions and 1,894 deletions.
diff --git a/.flake8 b/.flake8
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,22 +10,12 @@ repos:
       - id: end-of-file-fixer
       - id: trailing-whitespace
 
-  - repo: https://github.com/psf/black
-    rev: 24.4.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.5.2
     hooks:
-      - id: black
-        exclude: ^apax/utils/jax_md_reduced/
-
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        args: ["--profile", "black", "--filter-files"]
-        exclude: ^apax/utils/jax_md_reduced/
-
-  - repo: https://github.com/pycqa/flake8
-    rev: 7.0.0
-    hooks:
-      - id: flake8
-        additional_dependencies: [ flake8-isort ]
-        exclude: ^apax/utils/jax_md_reduced/
+      # Run the linter.
+      - id: ruff
+        args: [ --fix ]
+      # Run the formatter.
+      - id: ruff-format
diff --git a/apax/__init__.py b/apax/__init__.py
@@ -1,4 +1,5 @@
 import os
+import warnings
 
 import jax
 
@@ -8,3 +9,5 @@
 from apax.utils.helpers import setup_ase
 
 setup_ase()
+
+warnings.filterwarnings("ignore", message=".*os.fork()*")
diff --git a/apax/bal/api.py b/apax/bal/api.py
@@ -140,7 +140,7 @@ def kernel_selection(
     n_train = len(train_atoms)
     dataset = OTFInMemoryDataset(
         train_atoms + pool_atoms,
-        cutoff=config.model.r_max,
+        cutoff=config.model.basis.r_max,
         bs=processing_batch_size,
         n_epochs=1,
         ignore_labels=True,

diff --git a/apax/bal/feature_maps.py b/apax/bal/feature_maps.py
@@ -1,6 +1,7 @@
 from typing import Callable, Literal, Tuple, Union
 
 import jax
+import jax.ad_checkpoint
 import jax.numpy as jnp
 from flax.core.frozen_dict import FrozenDict
 from flax.traverse_util import flatten_dict, unflatten_dict
@@ -39,7 +40,7 @@ class LastLayerGradientFeatures(FeatureTransformation, extra="forbid"):
     https://arxiv.org/pdf/2203.09410
     """
 
-    name: Literal["ll_grad"]
+    name: Literal["ll_grad"] = "ll_grad"
     layer_name: str = "dense_2"
 
     def apply(self, model: EnergyModel) -> FeatureMap:
@@ -58,19 +59,79 @@ def inner(ll_params):
                     inputs["box"],
                     inputs["offsets"],
                 )
-                return model.apply(full_params, R, Z, idx, box, offsets)
+                out = model.apply(full_params, R, Z, idx, box, offsets)
+                # take mean in case of shallow ensemble
+                # no effect for single model
+                out = jnp.mean(out)
+                return out
 
             g_ll = jax.grad(inner)(ll_params)
             g_ll = unflatten_dict(g_ll)
-
+            g_ll = jax.tree_map(lambda arr: jnp.mean(arr, axis=-1, keepdims=True), g_ll)
             g_flat = jax.tree_map(lambda arr: jnp.reshape(arr, (-1,)), g_ll)
-            (gw, gb), _ = jax.tree_util.tree_flatten(g_flat)
+            (gb, gw), _ = jax.tree_util.tree_flatten(g_flat)
+
+            g = [gw, gb]
+            g = jnp.concatenate(g)
+
+            return g
+
+        return ll_grad
+
+
+class LastLayerForceFeatures(FeatureTransformation, extra="forbid"):
+    """
+    Model transfomration which computes the gradient of the output
+    wrt. the specified layer.
+    """
+
+    name: Literal["ll_force_feat"] = "ll_force_feat"
+    layer_name: str = "dense_2"
+    return_raw: bool = True
+
+    def apply(self, model: EnergyModel) -> FeatureMap:
+        def ll_grad(params, inputs):
+            ll_params, remaining_params = extract_feature_params(params, self.layer_name)
 
-            bias_factor = 0.1
-            weight_factor = jnp.sqrt(1 / gw.shape[-1])
-            g_scaled = [weight_factor * gw, bias_factor * gb]
+            energy_fn = lambda *inputs: jnp.mean(model.apply(*inputs))
+            force_fn = jax.grad(energy_fn, 1)
 
-            g = jnp.concatenate(g_scaled)
+            def inner(ll_params):
+                ll_params.update(remaining_params)
+                full_params = unflatten_dict(ll_params)
+
+                R, Z, idx, box, offsets = (
+                    inputs["positions"],
+                    inputs["numbers"],
+                    inputs["idx"],
+                    inputs["box"],
+                    inputs["offsets"],
+                )
+                out = force_fn(full_params, R, Z, idx, box, offsets)
+                return out
+
+            ll_params = jax.tree_map(
+                lambda arr: jnp.mean(arr, axis=-1, keepdims=True), ll_params
+            )
+            g_ll = jax.jacfwd(inner)(ll_params)
+            g_ll = unflatten_dict(g_ll)
+
+            # shapes:
+            # b: n_atoms, 3, 1
+            # w: n_atoms, 3, n_features, 1
+
+            if self.return_raw:
+                (gb, gw), _ = jax.tree_util.tree_flatten(g_ll)
+
+                # g: n_atoms, 3, n_features
+                g = gw[:, :, :, 0]
+            else:
+                g_flat = jax.tree_map(
+                    lambda arr: jnp.reshape(jnp.sum(jnp.sum(arr, 0), 0), (-1,)), g_ll
+                )
+                (gb, gw), _ = jax.tree_util.tree_flatten(g_flat)
+                g = [gw, gb]
+                g = jnp.concatenate(g)
 
             return g
 
@@ -87,5 +148,5 @@ def apply(self, model: EnergyModel) -> FeatureMap:
 
 
 FeatureMapOptions = TypeAdapter(
-    Union[LastLayerGradientFeatures, IdentityFeatures]
+    Union[LastLayerGradientFeatures, LastLayerForceFeatures, IdentityFeatures]
 ).validate_python
diff --git a/apax/cli/apax_app.py b/apax/cli/apax_app.py
@@ -177,7 +177,7 @@ def visualize_model(
             "Training configuration file to be visualized. A CO molecule is taken as"
             " sample input."
         ),
-    )
+    ),
 ):
     """
     Visualize a model based on a configuration file.

diff --git a/apax/cli/templates/train_config_full.yaml b/apax/cli/templates/train_config_full.yaml
@@ -1,9 +1,9 @@
 n_epochs: <NUMBER OF EPOCHS>
 seed: 1
 patience: null
-n_models: 1
 n_jitted_steps: 1
 data_parallel: True
+weight_average: null
 
 data:
   directory: models/
@@ -15,8 +15,11 @@ data:
   #train_data_path: <PATH>
   #val_data_path: <PATH>
   #test_data_path: <PATH>
+  dataset:
+    processing: cached
+    shuffle_buffer_size: 1000
+
   additional_properties_info: {}
-  ds_type: cached
 
   n_train: 1000
   n_valid: 100
@@ -30,20 +33,27 @@ data:
   scale_method: "per_element_force_rms_scale"
   scale_options: {}
 
-  shuffle_buffer_size: 1000
-
   pos_unit: Ang
   energy_unit: eV
 
 model:
-  n_basis: 7
+  basis:
+    name: gaussian
+    n_basis: 7
+    r_max: 6.0
+    r_min: 0.5
+
+  ensemble: null
+  # if you would like to train model ensembles, this can be achieved with
+  # the following example.
+  # ensemble:
+  #  kind: full
+  #  n_members: N
+
   n_radial: 5
-  n_contr: -1
+  n_contr: 8
   nn: [512, 512]
 
-  r_max: 6.0
-  r_min: 0.5
-
   calc_stress: true
   use_zbl: false
 
@@ -73,16 +83,17 @@ metrics:
   - mse
 
 optimizer:
-  opt_name: adam
-  opt_kwargs: {}
+  name: adam
+  kwargs: {}
   emb_lr: 0.03
   nn_lr: 0.03
   scale_lr: 0.001
   shift_lr: 0.05
   zbl_lr: 0.001
-  transition_begin: 0
-  sam_rho: 0.0
-
+  schedule:
+    name: linear
+    transition_begin: 0
+    end_value: 1e-6
 callbacks:
 - name: csv
 

diff --git a/apax/config/lr_config.py b/apax/config/lr_config.py
@@ -0,0 +1,45 @@
+from typing import Literal
+
+from pydantic import BaseModel, NonNegativeFloat
+
+
+class LRSchedule(BaseModel, frozen=True, extra="forbid"):
+    name: str
+
+
+class LinearLR(LRSchedule, frozen=True, extra="forbid"):
+    """
+    Configuration of the optimizer.
+    Learning rates of 0 will freeze the respective parameters.
+
+    Parameters
+    ----------
+    name : str, default = "adam"
+    transition_begin: int = 0
+        Number of steps after which to start decreasing
+    end_value: NonNegativeFloat = 1e-6
+        Final LR at the end of training.
+    """
+
+    name: Literal["linear"]
+    transition_begin: int = 0
+    end_value: NonNegativeFloat = 1e-6
+
+
+class CyclicCosineLR(LRSchedule, frozen=True, extra="forbid"):
+    """
+    Configuration of the optimizer.
+    Learning rates of 0 will freeze the respective parameters.
+
+    Parameters
+    ----------
+    period: int = 20
+        Length of a cycle in epochs.
+    decay_factor: NonNegativeFloat = 1.0
+        Factor by which to decrease the LR after each cycle.
+        1.0 means no decrease.
+    """
+
+    name: Literal["cyclic_cosine"]
+    period: int = 20
+    decay_factor: NonNegativeFloat = 1.0
diff --git a/apax/config/md_config.py b/apax/config/md_config.py
@@ -50,9 +50,13 @@ class NVEOptions(Integrator, extra="forbid"):
     ----------
     name : Literal["nve"]
         Name of the ensemble.
+    init_temperature : PositiveFloat, default = 298.15
+        Initialisation temperature in Kelvin (K).
+
     """
 
     name: Literal["nve"]
+    init_temperature: PositiveFloat = 298.15  # K
 
 
 class NVTOptions(Integrator, extra="forbid"):
-Original file line number
+Diff line change
@@ Expand Up / @@ -177,7 +177,7 @@ def visualize_model( @@
                 "Training configuration file to be visualized. A CO molecule is taken as"
                 " sample input."
             ),
-        )
+        ),
     ):
         """
         Visualize a model based on a configuration file.
@@ Expand Down @@