diff --git a/common/batch.py b/common/batch.py
index cfd0ac0..9b05c65 100644
--- a/common/batch.py
+++ b/common/batch.py
@@ -53,7 +53,7 @@ def batch_size(self) -> int:
 class DataclassBatch(BatchBase):
   @classmethod
   def feature_names(cls):
-    return list(cls.__dataclass_fields__.keys())
+    return [* cls.__dataclass_fields__]
 
   def as_dict(self):
     return {
diff --git a/common/checkpointing/snapshot.py b/common/checkpointing/snapshot.py
index 2703efd..f3fcfba 100644
--- a/common/checkpointing/snapshot.py
+++ b/common/checkpointing/snapshot.py
@@ -109,7 +109,7 @@ def load_snapshot_to_weight(
     """
     start_time = time.time()
     manifest = embedding_snapshot.get_manifest()
-    for path in manifest.keys():
+    for path in manifest:
       if path.startswith("0") and snapshot_emb_name in path:
         snapshot_path_to_load = path
     embedding_snapshot.read_object(snapshot_path_to_load, weight_tensor)
diff --git a/common/log_weights.py b/common/log_weights.py
index d2c8e21..dc8bacd 100644
--- a/common/log_weights.py
+++ b/common/log_weights.py
@@ -23,7 +23,7 @@ def weights_to_log(
   if not how_to_log:
     return
 
-  to_log = dict()
+  to_log = {}
   named_parameters = model.named_parameters()
   logging.info(f"Using DMP: {isinstance(model, DistributedModelParallel)}")
   if isinstance(model, DistributedModelParallel):
@@ -58,7 +58,7 @@ def log_ebc_norms(
       i.e. model.embeddings.ebc.embedding_bags.meta__user_id.weight
       sample_size: Limits number of rows per rank to compute average on to avoid OOM.
   """
-  norm_logs = dict()
+  norm_logs = {}
   for emb_key in ebc_keys:
     norms = (torch.ones(1, dtype=torch.float32) * -1).to(torch.device(f"cuda:{dist.get_rank()}"))
     if emb_key in model_state_dict:
diff --git a/optimizers/optimizer.py b/optimizers/optimizer.py
index 4517368..a0a53b9 100644
--- a/optimizers/optimizer.py
+++ b/optimizers/optimizer.py
@@ -61,7 +61,7 @@ def __init__(
   ):
     self.optimizer = optimizer
     self.lr_dict = lr_dict
-    self.group_names = list(self.lr_dict.keys())
+    self.group_names = [* self.lr_dict]
 
     num_param_groups = sum(1 for _, _optim in optimizer._optims for _ in _optim.param_groups)
     if num_param_groups != len(lr_dict):
diff --git a/projects/home/recap/data/dataset.py b/projects/home/recap/data/dataset.py
index 3478c68..1b8eb6f 100644
--- a/projects/home/recap/data/dataset.py
+++ b/projects/home/recap/data/dataset.py
@@ -250,7 +250,7 @@ def __init__(
     vocab_mapper: tf.keras.Model = None,
   ):
     logging.info("***** Labels *****")
-    logging.info(list(data_config.tasks.keys()))
+    logging.info([* data_config.tasks])
 
     self._data_config = data_config
     self._parse_fn = get_seg_dense_parse_fn(data_config)
@@ -295,7 +295,7 @@ def __init__(
       add_weights=should_add_weights,
     )
 
-    sparse_feature_names = list(vocab_mapper.vocabs.keys()) if vocab_mapper else None
+    sparse_feature_names = [* vocab_mapper.vocabs] if vocab_mapper else None
 
     self._tf_dataset = self._create_tf_dataset()
 
diff --git a/projects/home/recap/data/tfe_parsing.py b/projects/home/recap/data/tfe_parsing.py
index f597746..0686c52 100644
--- a/projects/home/recap/data/tfe_parsing.py
+++ b/projects/home/recap/data/tfe_parsing.py
@@ -25,7 +25,7 @@ def create_tf_example_schema(
     A dictionary schema suitable for deserializing tf.Example.
   """
   segdense_config = data_config.seg_dense_schema
-  labels = list(data_config.tasks.keys())
+  labels = [* data_config.tasks]
   used_features = (
     segdense_config.features + list(segdense_config.renamed_features.values()) + labels
   )
@@ -96,7 +96,7 @@ def parse_tf_example(
   # at TF level.
   # We should not return empty tensors if we dont use embeddings.
   # Otherwise, it breaks numpy->pt conversion
-  renamed_keys = list(seg_dense_schema_config.renamed_features.keys())
+  renamed_keys = [* seg_dense_schema_config.renamed_features]
   for renamed_key in renamed_keys:
     if "embedding" in renamed_key and (renamed_key not in inputs):
       inputs[renamed_key] = tf.zeros([], tf.float32)
diff --git a/projects/home/recap/data/util.py b/projects/home/recap/data/util.py
index a9fd51e..94af8ad 100644
--- a/projects/home/recap/data/util.py
+++ b/projects/home/recap/data/util.py
@@ -16,7 +16,7 @@ def keyed_tensor_from_tensors_dict(
   Returns:
 
   """
-  keys = list(tensor_map.keys())
+  keys = [* tensor_map]
   # We expect batch size to be first dim. However, if we get a shape [Batch_size],
   # KeyedTensor will not find the correct batch_size. So, in those cases we make sure the shape is
   # [Batch_size x 1].
@@ -84,7 +84,7 @@ def keyed_jagged_tensor_from_tensors_dict(
   lengths = torch.cat(lengths, axis=0)
 
   return torchrec.KeyedJaggedTensor(
-    keys=list(tensor_map.keys()),
+    keys=[* tensor_map],
     values=values,
     lengths=lengths,
   )
diff --git a/projects/home/recap/main.py b/projects/home/recap/main.py
index 3416164..2645434 100644
--- a/projects/home/recap/main.py
+++ b/projects/home/recap/main.py
@@ -47,7 +47,7 @@ def run(unused_argv: str, data_service_dispatcher: Optional[str] = None):
 
   loss_fn = losses.build_multi_task_loss(
     loss_type=LossType.BCE_WITH_LOGITS,
-    tasks=list(config.model.tasks.keys()),
+    tasks=[* config.model.tasks],
     pos_weights=[task.pos_weight for task in config.model.tasks.values()],
   )
 
diff --git a/projects/home/recap/model/entrypoint.py b/projects/home/recap/model/entrypoint.py
index 8f4d534..62dfdb6 100644
--- a/projects/home/recap/model/entrypoint.py
+++ b/projects/home/recap/model/entrypoint.py
@@ -149,7 +149,7 @@ def __init__(
         neg_downsampling_rate=data_config.tasks[task_name].neg_downsampling_rate,
       )
 
-    self._task_names = list(config.tasks.keys())
+    self._task_names = [* config.tasks]
     self._towers = torch.nn.ModuleDict(_towers)
     self._affine_maps = torch.nn.ModuleDict(_affine_maps)
     self._calibrators = torch.nn.ModuleDict(_calibrators)
diff --git a/projects/home/recap/optimizer/optimizer.py b/projects/home/recap/optimizer/optimizer.py
index c5b0cf1..a018ba7 100644
--- a/projects/home/recap/optimizer/optimizer.py
+++ b/projects/home/recap/optimizer/optimizer.py
@@ -40,7 +40,7 @@ def __init__(
   ):
     self.optimizer = optimizer
     self.lr_dict = lr_dict
-    self.group_names = list(self.lr_dict.keys())
+    self.group_names = [* self.lr_dict]
     self.emb_learning_rate = emb_learning_rate
 
     # We handle sparse LR scheduling separately, so only validate LR groups against dense param groups
@@ -146,7 +146,7 @@ def build_optimizer(
       )
     )
 
-  if not parameter_groups.keys() == all_learning_rates.keys():
+  if not parameter_groups == all_learning_rates:
     raise ValueError("Learning rates do not match optimizers")
 
   # If the optimiser is dense, model.fused_optimizer will be empty (but not None)
diff --git a/projects/twhin/data/edges.py b/projects/twhin/data/edges.py
index f7864b1..79b698f 100644
--- a/projects/twhin/data/edges.py
+++ b/projects/twhin/data/edges.py
@@ -38,7 +38,7 @@ def __init__(
 
     self.table_sizes = table_sizes
     self.num_tables = len(table_sizes)
-    self.table_names = list(table_sizes.keys())
+    self.table_names = [* table_sizes]
 
     self.relations = relations
     self.relations_t = torch.tensor(
diff --git a/projects/twhin/metrics.py b/projects/twhin/metrics.py
index 4296f80..e326e2e 100644
--- a/projects/twhin/metrics.py
+++ b/projects/twhin/metrics.py
@@ -7,7 +7,7 @@
 def create_metrics(
   device: torch.device,
 ):
-  metrics = dict()
+  metrics = {}
   metrics.update(
     {
       "AUC": core_metrics.Auc(128),