diff --git a/README.md b/README.md index b10da0f23..ed91d6d7b 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,10 @@ The majority of scripts is licensed under ASL 2.0 (including codes from Diffuser - Specify the learning rate and dim (rank) for each block. - See [Block-wise learning rates in LoRA](./docs/train_network_README-ja.md#階層別学習率) for details (Japanese only). +- An option `--disable_mmap_load_safetensors` is added to disable memory mapping when loading the model's .safetensors in SDXL. PR [#1266](https://github.com/kohya-ss/sd-scripts/pull/1266) Thanks to Zovjsra! + - It seems that the model file loading is faster in the WSL environment etc. + - Available in `sdxl_train.py`, `sdxl_train_network.py`, `sdxl_train_textual_inversion.py`, and `sdxl_train_control_net_lllite.py`. + - Fixed some bugs when using DeepSpeed. Related [#1247](https://github.com/kohya-ss/sd-scripts/pull/1247) - SDXL の学習時に Fused optimizer が使えるようになりました。PR [#1259](https://github.com/kohya-ss/sd-scripts/pull/1259) 2kpr 氏に感謝します。 @@ -193,6 +197,10 @@ The majority of scripts is licensed under ASL 2.0 (including codes from Diffuser - ブロックごとに学習率および dim (rank) を指定することができます。 - 詳細は [LoRA の階層別学習率](./docs/train_network_README-ja.md#階層別学習率) をご覧ください。 +- SDXL でモデルの .safetensors を読み込む際にメモリマッピングを無効化するオプション `--disable_mmap_load_safetensors` が追加されました。PR [#1266](https://github.com/kohya-ss/sd-scripts/pull/1266) Zovjsra 氏に感謝します。 + - WSL 環境等でモデルファイルの読み込みが高速化されるようです。 + - `sdxl_train.py`、`sdxl_train_network.py`、`sdxl_train_textual_inversion.py`、`sdxl_train_control_net_lllite.py` で使用可能です。 + - DeepSpeed 使用時のいくつかのバグを修正しました。関連 [#1247](https://github.com/kohya-ss/sd-scripts/pull/1247) diff --git a/library/sdxl_model_util.py b/library/sdxl_model_util.py index e6fcb1f9c..4fad78a1c 100644 --- a/library/sdxl_model_util.py +++ b/library/sdxl_model_util.py @@ -9,8 +9,10 @@ from library import model_util from library import sdxl_original_unet from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) VAE_SCALE_FACTOR = 0.13025 @@ -171,8 +173,8 @@ def load_models_from_sdxl_checkpoint(model_version, ckpt_path, map_location, dty # Load the state dict if model_util.is_safetensors(ckpt_path): checkpoint = None - if(disable_mmap): - state_dict = safetensors.torch.load(open(ckpt_path, 'rb').read()) + if disable_mmap: + state_dict = safetensors.torch.load(open(ckpt_path, "rb").read()) else: try: state_dict = load_file(ckpt_path, device=map_location) diff --git a/library/sdxl_train_util.py b/library/sdxl_train_util.py index 106c5b455..b74bea91a 100644 --- a/library/sdxl_train_util.py +++ b/library/sdxl_train_util.py @@ -5,6 +5,7 @@ import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate import init_empty_weights @@ -13,8 +14,10 @@ from library import model_util, sdxl_model_util, train_util, sdxl_original_unet from library.sdxl_lpw_stable_diffusion import SdxlStableDiffusionLongPromptWeightingPipeline from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) TOKENIZER1_PATH = "openai/clip-vit-large-patch14" @@ -44,7 +47,7 @@ def load_target_model(args, accelerator, model_version: str, weight_dtype): weight_dtype, accelerator.device if args.lowram else "cpu", model_dtype, - args.disable_mmap_load_safetensors + args.disable_mmap_load_safetensors, ) # work on low-ram device @@ -336,6 +339,7 @@ def add_sdxl_training_arguments(parser: argparse.ArgumentParser): parser.add_argument( "--disable_mmap_load_safetensors", action="store_true", + help="disable mmap load for safetensors. Speed up model loading in WSL environment / safetensorsのmmapロードを無効にする。WSL環境等でモデル読み込みを高速化できる", )