diff --git a/.github/workflows/typos.yml b/.github/workflows/typos.yml
index e8b06483f..c81ff3210 100644
--- a/.github/workflows/typos.yml
+++ b/.github/workflows/typos.yml
@@ -18,4 +18,4 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: typos-action
-        uses: crate-ci/typos@v1.19.0
+        uses: crate-ci/typos@v1.21.0
diff --git a/_typos.toml b/_typos.toml
index ae9e06b18..bbf7728f4 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -2,6 +2,7 @@
 # Instruction:  https://github.com/marketplace/actions/typos-action#getting-started
 
 [default.extend-identifiers]
+ddPn08="ddPn08"
 
 [default.extend-words]
 NIN="NIN"
@@ -27,6 +28,7 @@ rik="rik"
 koo="koo"
 yos="yos"
 wn="wn"
+hime="hime"
 
 
 [files]
diff --git a/library/ipex/attention.py b/library/ipex/attention.py
index d989ad53d..2bc62f65c 100644
--- a/library/ipex/attention.py
+++ b/library/ipex/attention.py
@@ -5,7 +5,7 @@
 
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
 
-# ARC GPUs can't allocate more than 4GB to a single block so we slice the attetion layers
+# ARC GPUs can't allocate more than 4GB to a single block so we slice the attention layers
 
 sdpa_slice_trigger_rate = float(os.environ.get('IPEX_SDPA_SLICE_TRIGGER_RATE', 4))
 attention_slice_rate = float(os.environ.get('IPEX_ATTENTION_SLICE_RATE', 4))
diff --git a/networks/control_net_lllite_for_train.py b/networks/control_net_lllite_for_train.py
index 65b3520cf..366451b7f 100644
--- a/networks/control_net_lllite_for_train.py
+++ b/networks/control_net_lllite_for_train.py
@@ -7,8 +7,10 @@
 import torch
 from library import sdxl_original_unet
 from library.utils import setup_logging
+
 setup_logging()
 import logging
+
 logger = logging.getLogger(__name__)
 
 # input_blocksに適用するかどうか / if True, input_blocks are not applied
@@ -103,19 +105,15 @@ def set_lllite(self, depth, cond_emb_dim, name, mlp_dim, dropout=None, multiplie
         add_lllite_modules(self, in_dim, depth, cond_emb_dim, mlp_dim)
 
         self.cond_image = None
-        self.cond_emb = None
 
     def set_cond_image(self, cond_image):
         self.cond_image = cond_image
-        self.cond_emb = None
 
     def forward(self, x):
         if not self.enabled:
             return super().forward(x)
 
-        if self.cond_emb is None:
-            self.cond_emb = self.lllite_conditioning1(self.cond_image)
-        cx = self.cond_emb
+        cx = self.lllite_conditioning1(self.cond_image)  # make forward and backward compatible
 
         # reshape / b,c,h,w -> b,h*w,c
         n, c, h, w = cx.shape
@@ -159,9 +157,7 @@ def forward(self, x):  # , cond_image=None):
         if not self.enabled:
             return super().forward(x)
 
-        if self.cond_emb is None:
-            self.cond_emb = self.lllite_conditioning1(self.cond_image)
-        cx = self.cond_emb
+        cx = self.lllite_conditioning1(self.cond_image)
 
         cx = torch.cat([cx, self.down(x)], dim=1)
         cx = self.mid(cx)
diff --git a/sdxl_train_control_net_lllite.py b/sdxl_train_control_net_lllite.py
index 301310901..5ff060a9f 100644
--- a/sdxl_train_control_net_lllite.py
+++ b/sdxl_train_control_net_lllite.py
@@ -289,6 +289,9 @@ def train(args):
     # acceleratorがなんかよろしくやってくれるらしい
     unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
 
+    if isinstance(unet, DDP):
+        unet._set_static_graph() # avoid error for multiple use of the parameter
+
     if args.gradient_checkpointing:
         unet.train()  # according to TI example in Diffusers, train is required -> これオリジナルのU-Netしたので本当は外せる
     else: