Merge branch 'release/0.3.2'

ControlNet · Aug 2, 2023 · 93ef891 · 93ef891
2 parents f101fe5 + b4b094d
commit 93ef891
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 5 deletions.
diff --git a/src/tensorneko/callback/gpu_stats_logger.py b/src/tensorneko/callback/gpu_stats_logger.py
@@ -25,7 +25,7 @@ def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> No
                 f"gpu{gpu.index}_utilization": gpu.utilization / 100,
                 f"gpu{gpu.index}_power_draw": float(gpu.power_draw),
                 f"gpu{gpu.index}_power_percentage": gpu.power_draw / gpu.power_limit,
-                f"gpu{gpu.index}_fan_speed": float(gpu.fan_speed),
+                f"gpu{gpu.index}_fan_speed": float(gpu.fan_speed) if gpu.fan_speed is not None else 0.,
             }
             pl_module.logger.log_metrics(logged_info, step=trainer.global_step)
             pl_module.log_dict(logged_info, logger=False, sync_dist=pl_module.distributed)
diff --git a/src/tensorneko/neko_model.py b/src/tensorneko/neko_model.py
@@ -243,3 +243,7 @@ def log_image(self, name: str, image: torch.Tensor) -> None:
             return
 
         self.logger.experiment.add_image(name, torch.clip(image, 0, 1), self.trainer.global_step)
+
+    def on_train_start(self) -> None:
+        """Log the model graph to tensorboard when the input shape is set"""
+        self.logger.log_graph(self, self.example_input_array)
diff --git a/src/tensorneko/neko_trainer.py b/src/tensorneko/neko_trainer.py
@@ -215,8 +215,7 @@ def __init__(self,
         self.has_no_logger = logger is None
 
         self.logger_train = TensorBoardLogger(save_dir=self.default_root_dir, name="logs",
-            version=os.path.join(self.log_name, "train"), log_graph=False
-            # TODO: Fix log_Graph
+            version=os.path.join(self.log_name, "train"), log_graph=True
         ) if self.has_no_logger is not None else None
         self.logger_val = TensorBoardLogger(save_dir=self.default_root_dir, name="logs",
             version=os.path.join(self.log_name, "val"), log_graph=False

diff --git a/src/tensorneko_util/backend/audio_lib.py b/src/tensorneko_util/backend/audio_lib.py
@@ -1,9 +1,12 @@
 from enum import Enum
 from typing import Optional
 
+from .visual_lib import VisualLib
+
 
 class AudioLib(Enum):
     PYTORCH = 1
+    FFMPEG = 2
 
     _is_torchaudio_available: Optional[bool] = None
 
@@ -16,3 +19,7 @@ def pytorch_available(cls) -> bool:
             except ImportError:
                 cls._is_torchaudio_available = False
         return cls._is_torchaudio_available
+
+    @classmethod
+    def ffmpeg_available(cls) -> bool:
+        return VisualLib.ffmpeg_available()
diff --git a/src/tensorneko_util/backend/visual_lib.py b/src/tensorneko_util/backend/visual_lib.py
@@ -56,7 +56,15 @@ def ffmpeg_available() -> bool:
         if _VisualLibAvailability.is_ffmpeg_available is None:
             ffmpeg_available = subprocess.run('ffmpeg -version', stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL, shell=True).returncode == 0
-            _VisualLibAvailability.is_ffmpeg_available = ffmpeg_available
+            if ffmpeg_available:
+                try:
+                    import ffmpeg
+                    _VisualLibAvailability.is_ffmpeg_available = True
+                except ImportError:
+                    _VisualLibAvailability.is_ffmpeg_available = False
+            else:
+                _VisualLibAvailability.is_ffmpeg_available = False
+
         return _VisualLibAvailability.is_ffmpeg_available
 
     @staticmethod

diff --git a/src/tensorneko_util/io/audio/audio_reader.py b/src/tensorneko_util/io/audio/audio_reader.py
@@ -1,5 +1,7 @@
 from typing import Optional
 
+import numpy as np
+
 from .audio_data import AudioData
 from .._default_backends import _default_audio_io_backend
 from ...backend.audio_lib import AudioLib
@@ -30,6 +32,33 @@ def of(path: str, channel_first: bool = True, backend: Optional[AudioLib] = None
                 raise ValueError("Torchaudio is not available.")
             import torchaudio
             return AudioData(*torchaudio.load(path, channels_first=channel_first))
+        elif backend == AudioLib.FFMPEG:
+            if not AudioLib.ffmpeg_available():
+                raise ValueError("FFmpeg is not available.")
+            import ffmpeg
+
+            for stream in ffmpeg.probe(path)["streams"]:
+                if stream["codec_type"] == "audio":
+                    sample_rate = int(stream["sample_rate"])
+                    channel = int(stream["channels"])
+                    break
+            else:
+                raise RuntimeError("No audio stream found.")
+
+            try:
+                out, _ = (
+                    ffmpeg.input(path, threads=0)
+                    .output("-", format="s16le", acodec="pcm_s16le")
+                    .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+                )
+            except ffmpeg.Error as e:
+                raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+
+            arr = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
+            arr = arr.reshape(-1, channel)
+            arr = arr.T if channel_first else arr
+
+            return AudioData(arr, sample_rate)
         else:
             raise ValueError("Unknown audio library: {}".format(backend))
 

diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-0.3.1
+0.3.2