From a92b859e4090a1d33576036a688a44bb6b5d55b7 Mon Sep 17 00:00:00 2001 From: Seunghoon Lee Date: Sun, 5 May 2024 19:56:08 +0900 Subject: [PATCH] Support olive-ai 0.5.x. --- configs/olive/sd/text_encoder.json | 25 +- configs/olive/sd/unet.json | 25 +- configs/olive/sd/vae_decoder.json | 25 +- configs/olive/sd/vae_encoder.json | 25 +- configs/olive/sdxl/text_encoder.json | 17 +- configs/olive/sdxl/text_encoder_2.json | 19 +- configs/olive/sdxl/unet.json | 17 +- configs/olive/sdxl/vae_decoder.json | 17 +- configs/olive/sdxl/vae_encoder.json | 17 +- modules/devices.py | 3 + modules/launch_utils.py | 3 - modules/olive_script.py | 8 +- modules/onnx_impl/__init__.py | 119 ++--- modules/onnx_impl/execution_providers.py | 24 +- modules/onnx_impl/pipelines/__init__.py | 128 +++-- ...nx_stable_diffusion_xl_img2img_pipeline.py | 1 + .../onnx_stable_diffusion_xl_pipeline.py | 1 + modules/onnx_impl/ui.py | 444 ++++++++---------- modules/onnx_impl/utils.py | 4 +- modules/shared_init.py | 2 +- 20 files changed, 414 insertions(+), 510 deletions(-) diff --git a/configs/olive/sd/text_encoder.json b/configs/olive/sd/text_encoder.json index 77722d99ae9..22299cbc583 100644 --- a/configs/olive/sd/text_encoder.json +++ b/configs/olive/sd/text_encoder.json @@ -16,7 +16,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -38,7 +45,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -49,7 +55,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -78,12 +83,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -94,7 +100,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -117,17 +122,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "text_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sd/unet.json b/configs/olive/sd/unet.json index 5e08ef5de1b..03922652e18 100644 --- a/configs/olive/sd/unet.json +++ b/configs/olive/sd/unet.json @@ -33,7 +33,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -55,7 +62,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -66,7 +72,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -95,12 +100,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -111,7 +117,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -134,17 +139,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "unet", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sd/vae_decoder.json b/configs/olive/sd/vae_decoder.json index 14d8d34de33..8358745c49b 100644 --- a/configs/olive/sd/vae_decoder.json +++ b/configs/olive/sd/vae_decoder.json @@ -23,7 +23,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -45,7 +52,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -56,7 +62,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -85,12 +90,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -101,7 +107,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -124,17 +129,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_decoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sd/vae_encoder.json b/configs/olive/sd/vae_encoder.json index c5643348c74..3ce9091fa02 100644 --- a/configs/olive/sd/vae_encoder.json +++ b/configs/olive/sd/vae_encoder.json @@ -23,7 +23,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -45,7 +52,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -56,7 +62,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -85,12 +90,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -101,7 +107,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -124,17 +129,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/text_encoder.json b/configs/olive/sdxl/text_encoder.json index faf9a6621de..16408587490 100644 --- a/configs/olive/sdxl/text_encoder.json +++ b/configs/olive/sdxl/text_encoder.json @@ -50,7 +50,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -73,7 +78,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -109,7 +113,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -120,7 +123,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -132,17 +134,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "text_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/text_encoder_2.json b/configs/olive/sdxl/text_encoder_2.json index 5532657f7a1..aa13427361f 100644 --- a/configs/olive/sdxl/text_encoder_2.json +++ b/configs/olive/sdxl/text_encoder_2.json @@ -46,7 +46,7 @@ ], "dynamic_axes": { "input_ids": { "0": "batch_size", "1": "sequence_length" }, - "text_embeds": { "0": "batch_size", "1": "sequence_length" }, + "text_embeds": { "0": "batch_size" }, "last_hidden_state": { "0": "batch_size", "1": "sequence_length" }, "hidden_states.0": { "0": "batch_size", "1": "sequence_length" }, "hidden_states.1": { "0": "batch_size", "1": "sequence_length" }, @@ -90,7 +90,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -113,7 +118,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -149,7 +153,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -160,7 +163,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -172,17 +174,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "text_encoder_2", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/unet.json b/configs/olive/sdxl/unet.json index 874002a96f1..28d86902302 100644 --- a/configs/olive/sdxl/unet.json +++ b/configs/olive/sdxl/unet.json @@ -40,7 +40,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -63,7 +68,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -99,7 +103,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -110,7 +113,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -122,17 +124,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "unet", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/vae_decoder.json b/configs/olive/sdxl/vae_decoder.json index 0074e3dd681..1f6823e5040 100644 --- a/configs/olive/sdxl/vae_decoder.json +++ b/configs/olive/sdxl/vae_decoder.json @@ -30,7 +30,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -53,7 +58,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -111,7 +115,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -121,7 +124,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -132,17 +134,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_decoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/vae_encoder.json b/configs/olive/sdxl/vae_encoder.json index 540c3a9ee72..e5d4f844e88 100644 --- a/configs/olive/sdxl/vae_encoder.json +++ b/configs/olive/sdxl/vae_encoder.json @@ -30,7 +30,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -53,7 +58,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -89,7 +93,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -100,7 +103,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -112,17 +114,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/modules/devices.py b/modules/devices.py index 90e8f6540e7..8ba7b7c6871 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -225,6 +225,9 @@ def autocast(disable=False): if fp8 and dtype_inference == torch.float32: return manual_cast(dtype) + if device == cpu: + return contextlib.nullcontext() + if dtype == torch.float32 or dtype_inference == torch.float32: return contextlib.nullcontext() diff --git a/modules/launch_utils.py b/modules/launch_utils.py index e830d764a79..8768adfd4b9 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -654,9 +654,6 @@ def prepare_environment(): if not is_installed("onnxruntime"): run_pip("install onnxruntime", "onnxruntime") - from modules.onnx_impl import initialize_olive - initialize_olive() - if not args.skip_install: run_extensions_installers(settings_file=args.ui_settings_file) diff --git a/modules/olive_script.py b/modules/olive_script.py index c7475f84dc8..e5366dfffd1 100644 --- a/modules/olive_script.py +++ b/modules/olive_script.py @@ -18,7 +18,7 @@ class ENVStore: } def __getattr__(self, name: str): - value = os.environ.get(f"SDDML_OLIVE_{name}", None) + value = os.environ.get(f"SDAMDGPU_OLIVE_{name}", None) if value is None: return ty = self.__class__.__annotations__[name] @@ -30,12 +30,12 @@ def __setattr__(self, name: str, value) -> None: return ty = self.__class__.__annotations__[name] serialize = self.__SERIALIZER[ty] - os.environ[f"SDDML_OLIVE_{name}"] = serialize(value) + os.environ[f"SDAMDGPU_OLIVE_{name}"] = serialize(value) def __delattr__(self, name: str) -> None: if name not in self.__class__.__annotations__: return - key = f"SDDML_OLIVE_{name}" + key = f"SDAMDGPU_OLIVE_{name}" if key not in os.environ: return os.environ.pop(key) @@ -47,7 +47,7 @@ class OliveOptimizerConfig(ENVStore): is_sdxl: bool vae: str - vae_sdxl_fp16_fix: bool = False + vae_sdxl_fp16_fix: bool width: int height: int diff --git a/modules/onnx_impl/__init__.py b/modules/onnx_impl/__init__.py index f2bb59c56a3..9d443d17ece 100644 --- a/modules/onnx_impl/__init__.py +++ b/modules/onnx_impl/__init__.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict, Callable, Optional import numpy as np import torch @@ -11,7 +10,6 @@ initialized = False -run_olive_workflow = None class DynamicSessionOptions(ort.SessionOptions): @@ -19,7 +17,6 @@ class DynamicSessionOptions(ort.SessionOptions): def __init__(self): super().__init__() - self.enable_mem_pattern = False @classmethod @@ -54,6 +51,9 @@ class TorchCompatibleModule: device = torch.device("cpu") dtype = torch.float32 + def named_modules(self): # dummy + return () + def to(self, *_, **__): raise NotImplementedError @@ -80,7 +80,6 @@ def to(self, *args, **kwargs): device = extract_device(args, kwargs) if device is not None and device.type != "cpu": from .execution_providers import TORCH_DEVICE_TO_EP - provider = TORCH_DEVICE_TO_EP[device.type] if device.type in TORCH_DEVICE_TO_EP else self.provider return OnnxRuntimeModel.load_model(self.path, provider, DynamicSessionOptions.from_sess_options(self.sess_options)) return self @@ -89,9 +88,6 @@ def to(self, *args, **kwargs): class OnnxRuntimeModel(TorchCompatibleModule, diffusers.OnnxRuntimeModel): config = {} # dummy - def named_modules(self): # dummy - return () - def to(self, *args, **kwargs): from modules.onnx_impl.utils import extract_device, move_inference_session @@ -103,17 +99,17 @@ def to(self, *args, **kwargs): class VAEConfig: - DEFAULTS = { - "scaling_factor": 0.18215, - } - + DEFAULTS = { "scaling_factor": 0.18215 } config: Dict def __init__(self, config: Dict): self.config = config def __getattr__(self, key): - return self.config.get(key, VAEConfig.DEFAULTS[key]) + return self.config.get(key, VAEConfig.DEFAULTS.get(key, None)) + + def get(self, key, default): + return self.config.get(key, VAEConfig.DEFAULTS.get(key, default)) class VAE(TorchCompatibleModule): @@ -154,10 +150,8 @@ def to(self, *args, **kwargs): def check_parameters_changed(p, refiner_enabled: bool): from modules import shared, sd_models - if shared.sd_model.__class__.__name__ == "OnnxRawPipeline" or not shared.sd_model.__class__.__name__.startswith("Onnx"): return shared.sd_model - compile_height = p.height compile_width = p.width if (shared.compiled_model_state is None or @@ -175,17 +169,14 @@ def check_parameters_changed(p, refiner_enabled: bool): shared.compiled_model_state.height = compile_height shared.compiled_model_state.width = compile_width shared.compiled_model_state.batch_size = p.batch_size - return shared.sd_model def preprocess_pipeline(p): from modules import shared, sd_models - if "ONNX" not in shared.opts.diffusers_pipeline: shared.log.warning(f"Unsupported pipeline for 'olive-ai' compile backend: {shared.opts.diffusers_pipeline}. You should select one of the ONNX pipelines.") return shared.sd_model - if hasattr(shared.sd_model, "preprocess"): shared.sd_model = shared.sd_model.preprocess(p) if hasattr(shared.sd_refiner, "preprocess"): @@ -195,7 +186,6 @@ def preprocess_pipeline(p): if shared.opts.onnx_unload_base: sd_models.reload_model_weights() shared.sd_model = shared.sd_model.preprocess(p) - return shared.sd_model @@ -211,82 +201,51 @@ def jsonable_encoder(obj: Any, *args, **kwargs): return fastapi_jsonable_encoder(obj, *args, **kwargs) -def initialize(): +def initialize_onnx(): global initialized # pylint: disable=global-statement - if initialized: return - from modules import devices - from modules.paths import models_path from modules.shared import opts - from .execution_providers import ExecutionProvider, TORCH_DEVICE_TO_EP, available_execution_providers - - onnx_dir = os.path.join(models_path, "ONNX") - if not os.path.isdir(onnx_dir): - os.mkdir(onnx_dir) - - if devices.backend == "rocm": - TORCH_DEVICE_TO_EP["cuda"] = ExecutionProvider.ROCm - - from .pipelines.onnx_stable_diffusion_pipeline import OnnxStableDiffusionPipeline - from .pipelines.onnx_stable_diffusion_img2img_pipeline import OnnxStableDiffusionImg2ImgPipeline - from .pipelines.onnx_stable_diffusion_inpaint_pipeline import OnnxStableDiffusionInpaintPipeline - from .pipelines.onnx_stable_diffusion_upscale_pipeline import OnnxStableDiffusionUpscalePipeline - from .pipelines.onnx_stable_diffusion_xl_pipeline import OnnxStableDiffusionXLPipeline - from .pipelines.onnx_stable_diffusion_xl_img2img_pipeline import OnnxStableDiffusionXLImg2ImgPipeline + try: # may fail on onnx import + from .execution_providers import ExecutionProvider, TORCH_DEVICE_TO_EP, available_execution_providers + if devices.backend == "rocm": + TORCH_DEVICE_TO_EP["cuda"] = ExecutionProvider.ROCm + from .pipelines.onnx_stable_diffusion_pipeline import OnnxStableDiffusionPipeline + from .pipelines.onnx_stable_diffusion_img2img_pipeline import OnnxStableDiffusionImg2ImgPipeline + from .pipelines.onnx_stable_diffusion_inpaint_pipeline import OnnxStableDiffusionInpaintPipeline + from .pipelines.onnx_stable_diffusion_upscale_pipeline import OnnxStableDiffusionUpscalePipeline + from .pipelines.onnx_stable_diffusion_xl_pipeline import OnnxStableDiffusionXLPipeline + from .pipelines.onnx_stable_diffusion_xl_img2img_pipeline import OnnxStableDiffusionXLImg2ImgPipeline - # OnnxRuntimeModel Hijack. - OnnxRuntimeModel.__module__ = 'diffusers' - diffusers.OnnxRuntimeModel = OnnxRuntimeModel + OnnxRuntimeModel.__module__ = 'diffusers' # OnnxRuntimeModel Hijack. + diffusers.OnnxRuntimeModel = OnnxRuntimeModel - diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline - diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionPipeline + diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline + diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionPipeline - diffusers.OnnxStableDiffusionImg2ImgPipeline = OnnxStableDiffusionImg2ImgPipeline - diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionImg2ImgPipeline + diffusers.OnnxStableDiffusionImg2ImgPipeline = OnnxStableDiffusionImg2ImgPipeline + diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionImg2ImgPipeline - diffusers.OnnxStableDiffusionInpaintPipeline = OnnxStableDiffusionInpaintPipeline - diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionInpaintPipeline + diffusers.OnnxStableDiffusionInpaintPipeline = OnnxStableDiffusionInpaintPipeline + diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionInpaintPipeline - diffusers.OnnxStableDiffusionUpscalePipeline = OnnxStableDiffusionUpscalePipeline + diffusers.OnnxStableDiffusionUpscalePipeline = OnnxStableDiffusionUpscalePipeline - diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline - diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLPipeline + diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline + diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLPipeline - diffusers.OnnxStableDiffusionXLImg2ImgPipeline = OnnxStableDiffusionXLImg2ImgPipeline - diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLImg2ImgPipeline + diffusers.OnnxStableDiffusionXLImg2ImgPipeline = OnnxStableDiffusionXLImg2ImgPipeline + diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLImg2ImgPipeline - # Huggingface model compatibility - diffusers.ORTStableDiffusionXLPipeline = diffusers.OnnxStableDiffusionXLPipeline - diffusers.ORTStableDiffusionXLImg2ImgPipeline = diffusers.OnnxStableDiffusionXLImg2ImgPipeline + diffusers.ORTStableDiffusionXLPipeline = diffusers.OnnxStableDiffusionXLPipeline # Huggingface model compatibility + diffusers.ORTStableDiffusionXLImg2ImgPipeline = diffusers.OnnxStableDiffusionXLImg2ImgPipeline - optimum.onnxruntime.modeling_diffusion._ORTDiffusionModelPart.to = ORTDiffusionModelPart_to # pylint: disable=protected-access + optimum.onnxruntime.modeling_diffusion._ORTDiffusionModelPart.to = ORTDiffusionModelPart_to # pylint: disable=protected-access - fastapi_encoders.jsonable_encoder = jsonable_encoder + fastapi_encoders.jsonable_encoder = jsonable_encoder - print(f'ONNX: selected={opts.onnx_execution_provider}, available={available_execution_providers}') - - initialized = True - - -def initialize_olive(): - global run_olive_workflow # pylint: disable=global-statement - from modules.launch_utils import is_installed - if not is_installed("olive-ai"): - return - import sys - import importlib - orig_sys_path = sys.path - venv_dir = os.environ.get("VENV_DIR", os.path.join(os.getcwd(), 'venv')) - try: - spec = importlib.util.find_spec('onnxruntime.transformers') - sys.path = [d for d in spec.submodule_search_locations + sys.path if sys.path[1] not in d or venv_dir in d] - from onnxruntime.transformers import convert_generation # noqa: F401 - spec = importlib.util.find_spec('olive') - sys.path = spec.submodule_search_locations + sys.path - run_olive_workflow = importlib.import_module('olive.workflows').run + print(f'ONNX: version={ort.__version__} provider={opts.onnx_execution_provider}, available={available_execution_providers}') except Exception as e: - run_olive_workflow = None - print(f'Olive: Failed to load olive-ai: {e}') - sys.path = orig_sys_path + print(f'ONNX failed to initialize: {e}') + initialized = True diff --git a/modules/onnx_impl/execution_providers.py b/modules/onnx_impl/execution_providers.py index 27116d165d3..8cb7cd22562 100644 --- a/modules/onnx_impl/execution_providers.py +++ b/modules/onnx_impl/execution_providers.py @@ -2,6 +2,7 @@ from enum import Enum from typing import Tuple, List import onnxruntime as ort +from modules import devices class ExecutionProvider(str, Enum): @@ -20,7 +21,6 @@ class ExecutionProvider(str, Enum): ExecutionProvider.CUDA: "gpu-cuda", # test required ExecutionProvider.ROCm: "gpu-rocm", # test required ExecutionProvider.MIGraphX: "gpu-migraphx", # test required - ExecutionProvider.OpenVINO: "gpu-openvino??", # test required } TORCH_DEVICE_TO_EP = { "cpu": ExecutionProvider.CPU, @@ -31,8 +31,6 @@ class ExecutionProvider(str, Enum): def get_default_execution_provider() -> ExecutionProvider: - from modules import devices - if devices.backend == "cpu": return ExecutionProvider.CPU elif devices.backend == "directml": @@ -40,10 +38,7 @@ def get_default_execution_provider() -> ExecutionProvider: elif devices.backend == "cuda": return ExecutionProvider.CUDA elif devices.backend == "rocm": - if ExecutionProvider.ROCm in available_execution_providers: - return ExecutionProvider.ROCm - else: - return ExecutionProvider.CPU + return ExecutionProvider.ROCm elif devices.backend == "ipex" or devices.backend == "openvino": return ExecutionProvider.OpenVINO return ExecutionProvider.CPU @@ -51,30 +46,21 @@ def get_default_execution_provider() -> ExecutionProvider: def get_execution_provider_options(): from modules.shared import cmd_opts, opts - - execution_provider_options = { - "device_id": int(cmd_opts.device_id or 0), - } - + execution_provider_options = { "device_id": int(cmd_opts.device_id or 0) } if opts.onnx_execution_provider == ExecutionProvider.ROCm: if ExecutionProvider.ROCm in available_execution_providers: execution_provider_options["tunable_op_enable"] = 1 execution_provider_options["tunable_op_tuning_enable"] = 1 - elif opts.onnx_execution_provider == ExecutionProvider.OpenVINO: - execution_provider_options["device_type"] = "GPU.0" - del execution_provider_options["device_id"] - return execution_provider_options def get_provider() -> Tuple: from modules.shared import opts - return (opts.onnx_execution_provider, get_execution_provider_options(),) def install_execution_provider(ep: ExecutionProvider): - from modules.launch_utils import is_installed, run_pip, run_pip_uninstall, get_onnxruntime_source_for_rocm + from modules.launch_utils import is_installed, run_pip, run_pip_uninstall if is_installed("onnxruntime"): run_pip_uninstall("onnxruntime") @@ -98,7 +84,7 @@ def install_execution_provider(ep: ExecutionProvider): print("ROCMExecutionProvider is not supported on Windows.") return - packages.append(get_onnxruntime_source_for_rocm(None)) + packages.append("--pre onnxruntime-training --index-url https://pypi.lsh.sh/60 --extra-index-url https://pypi.org/simple") elif ep == ExecutionProvider.OpenVINO: if is_installed("openvino"): run_pip_uninstall("openvino") diff --git a/modules/onnx_impl/pipelines/__init__.py b/modules/onnx_impl/pipelines/__init__.py index e4b8915a8c7..f2983dc9dd1 100644 --- a/modules/onnx_impl/pipelines/__init__.py +++ b/modules/onnx_impl/pipelines/__init__.py @@ -4,7 +4,6 @@ import tempfile from typing import Type, Tuple, List, Any, Dict from packaging import version -import onnx import torch import diffusers import onnxruntime as ort @@ -15,7 +14,7 @@ from modules.sd_models import CheckpointInfo from modules.processing import StableDiffusionProcessing from modules.olive_script import config -from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE, run_olive_workflow +from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE from modules.onnx_impl.utils import extract_device, move_inference_session, check_diffusers_cache, check_pipeline_sdxl, check_cache_onnx, load_init_dict, load_submodel, load_submodels, patch_kwargs, load_pipeline, get_base_constructor, get_io_config from modules.onnx_impl.execution_providers import ExecutionProvider, EP_TO_NAME, get_provider @@ -72,6 +71,10 @@ def to(self, *args, **kwargs): print(f"Component device/dtype conversion failed: module={name} args={args}, kwargs={kwargs}") return self + @property + def components(self): + return {} + @classmethod def from_pretrained(cls, pretrained_model_name_or_path, **_): # pylint: disable=arguments-differ return OnnxRawPipeline( @@ -170,7 +173,7 @@ def convert(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLik in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") ) - from modules import olive_script as olv + from modules import olive_script as script for submodel in submodels: destination = os.path.join(out_dir, submodel) @@ -178,8 +181,8 @@ def convert(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLik if not os.path.isdir(destination): os.mkdir(destination) - model = getattr(olv, f"{submodel}_load")(in_dir) - sample = getattr(olv, f"{submodel}_conversion_inputs")(None) + model = getattr(script, f"{submodel}_load")(in_dir) + sample = getattr(script, f"{submodel}_conversion_inputs")(None) with tempfile.TemporaryDirectory(prefix="onnx_conversion") as temp_dir: temp_path = os.path.join(temp_dir, "model.onnx") torch.onnx.export( @@ -228,12 +231,8 @@ def convert(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLik json.dump(model_index, file) def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLike): - ort.set_default_logger_severity(4) - - try: - from olive.model import ONNXModel # olive-ai==0.4.0 - except ImportError: - from olive.model import ONNXModelHandler as ONNXModel # olive-ai==0.5.0 + from olive.model import ONNXModelHandler + from olive.workflows import run as run_workflows shutil.rmtree("cache", ignore_errors=True) shutil.rmtree("footprints", ignore_errors=True) @@ -246,7 +245,7 @@ def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathL optimized_model_paths = {} for submodel in submodels: - print(f"\nProcessing {submodel}") + log.info(f"\nProcessing {submodel}") with open(os.path.join(sd_configs_path, "olive", 'sdxl' if self._is_sdxl else 'sd', f"{submodel}.json"), "r", encoding="utf-8") as config_file: olive_config: Dict[str, Dict[str, Dict]] = json.load(config_file) @@ -255,19 +254,20 @@ def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathL for i in range(len(flow)): flow[i] = flow[i].replace("AutoExecutionProvider", shared.opts.onnx_execution_provider) olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx")) - olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider] + olive_config["systems"]["local_system"]["config"]["accelerators"][0]["device"] = "cpu" if shared.opts.onnx_execution_provider == ExecutionProvider.CPU else "gpu" # TODO: npu + olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = [shared.opts.onnx_execution_provider] for pass_key in olive_config["passes"]: if olive_config["passes"][pass_key]["type"] == "OrtTransformersOptimization": float16 = shared.opts.olive_float16 and not (submodel == "vae_encoder" and shared.opts.olive_vae_encoder_float32) olive_config["passes"][pass_key]["config"]["float16"] = float16 + if not float16: + olive_config["passes"][pass_key]["config"]["force_fp16_inputs"] = {} if shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm: - if version.parse(ort.__version__) < version.parse("1.17.0"): - olive_config["passes"][pass_key]["config"]["optimization_options"] = {"enable_skip_group_norm": False} if float16: olive_config["passes"][pass_key]["config"]["keep_io_types"] = False - run_olive_workflow(olive_config) + run_workflows(olive_config) with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r", encoding="utf-8") as footprint_file: footprints = json.load(footprint_file) @@ -278,11 +278,11 @@ def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathL assert processor_final_pass_footprint, "Failed to optimize model" - optimized_model_paths[submodel] = ONNXModel( + optimized_model_paths[submodel] = ONNXModelHandler( **processor_final_pass_footprint["model_config"]["config"] ).model_path - print(f"Olive: Successfully processed model: submodel={submodel}") + log.info(f"Olive: Successfully processed model: submodel={submodel}") for submodel in submodels: src_path = optimized_model_paths[submodel] @@ -343,6 +343,7 @@ def preprocess(self, p: StableDiffusionProcessing): config.vae = os.path.join(models_path, "VAE", shared.opts.sd_vae) if not os.path.isfile(config.vae): del config.vae + config.vae_sdxl_fp16_fix = self._is_sdxl and shared.opts.diffusers_vae_upcast == "false" config.width = p.width config.height = p.height @@ -379,54 +380,51 @@ def preprocess(self, p: StableDiffusionProcessing): } in_dir = out_dir - if shared.opts.olive_enable: - if run_olive_workflow is None: - print('Olive: Skipping model compilation because olive-ai was loaded unsuccessfully.') + if shared.opts.cuda_compile_backend == "olive-ai": + submodels_for_olive = [] + + if "Text Encoder" in shared.opts.cuda_compile: + if not self.is_refiner: + submodels_for_olive.append("text_encoder") + if self._is_sdxl: + submodels_for_olive.append("text_encoder_2") + if "Model" in shared.opts.cuda_compile: + submodels_for_olive.append("unet") + if "VAE" in shared.opts.cuda_compile: + submodels_for_olive.append("vae_encoder") + submodels_for_olive.append("vae_decoder") + + if len(submodels_for_olive) == 0: + print("Olive: Skipping olive run.") else: - submodels_for_olive = [] - - if "Text Encoder" in shared.opts.olive_submodels: - if not self.is_refiner: - submodels_for_olive.append("text_encoder") - if self._is_sdxl: - submodels_for_olive.append("text_encoder_2") - if "Model" in shared.opts.olive_submodels: - submodels_for_olive.append("unet") - if "VAE" in shared.opts.olive_submodels: - submodels_for_olive.append("vae_encoder") - submodels_for_olive.append("vae_decoder") - - if len(submodels_for_olive) == 0: - print("Olive: Skipping olive run.") - else: - print("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.") - - out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h") - if not os.path.isdir(out_dir): # check the model is already optimized (cached) - if not shared.opts.olive_cache_optimized: - out_dir = shared.opts.onnx_temp_dir - - if p.width != p.height: - print("Olive: Different width and height are detected. The quality of the result is not guaranteed.") - - if shared.opts.olive_static_dims: - sess_options = DynamicSessionOptions() - sess_options.enable_static_dims({ - "is_sdxl": self._is_sdxl, - "is_refiner": self.is_refiner, - - "hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2, - "height": p.height, - "width": p.width, - }) - kwargs["sess_options"] = sess_options - - try: - self.run_olive(submodels_for_olive, in_dir, out_dir) - except Exception as e: - print(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}") - shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) - shutil.rmtree(out_dir, ignore_errors=True) + print("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.") + + out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h") + if not os.path.isdir(out_dir): # check the model is already optimized (cached) + if not shared.opts.olive_cache_optimized: + out_dir = shared.opts.onnx_temp_dir + + if p.width != p.height: + print("Olive: Different width and height are detected. The quality of the result is not guaranteed.") + + if shared.opts.olive_static_dims: + sess_options = DynamicSessionOptions() + sess_options.enable_static_dims({ + "is_sdxl": self._is_sdxl, + "is_refiner": self.is_refiner, + + "hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2, + "height": p.height, + "width": p.width, + }) + kwargs["sess_options"] = sess_options + + try: + self.run_olive(submodels_for_olive, in_dir, out_dir) + except Exception as e: + print(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}") + shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) + shutil.rmtree(out_dir, ignore_errors=True) pipeline = self.derive_properties(load_pipeline(self.constructor, out_dir, **kwargs)) diff --git a/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_img2img_pipeline.py b/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_img2img_pipeline.py index 58a41d81c77..5606da06eee 100644 --- a/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_img2img_pipeline.py +++ b/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_img2img_pipeline.py @@ -29,6 +29,7 @@ def __init__( ): optimum.onnxruntime.ORTStableDiffusionXLImg2ImgPipeline.__init__(self, vae_decoder, text_encoder, unet, config, tokenizer, scheduler, feature_extractor, vae_encoder, text_encoder_2, tokenizer_2, use_io_binding, model_save_dir, add_watermarker) super().__init__() + del self.image_processor # This image processor requires np array. In order to share same workflow with non-XL pipelines, delete it. def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, generator=None): batch_size = batch_size * num_images_per_prompt diff --git a/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_pipeline.py b/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_pipeline.py index d77dd6853e3..452e4f892ef 100644 --- a/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_pipeline.py +++ b/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_pipeline.py @@ -27,6 +27,7 @@ def __init__( ): optimum.onnxruntime.ORTStableDiffusionXLPipeline.__init__(self, vae_decoder, text_encoder, unet, config, tokenizer, scheduler, feature_extractor, vae_encoder, text_encoder_2, tokenizer_2, use_io_binding, model_save_dir, add_watermarker) super().__init__() + del self.image_processor # This image processor requires np array. In order to share same workflow with non-XL pipelines, delete it. def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, generator, latents=None): return prepare_latents(self.scheduler.init_noise_sigma, batch_size, height, width, dtype, generator, latents, num_channels_latents, self.vae_scale_factor) diff --git a/modules/onnx_impl/ui.py b/modules/onnx_impl/ui.py index ff4de86b11f..ed3f0080e53 100644 --- a/modules/onnx_impl/ui.py +++ b/modules/onnx_impl/ui.py @@ -21,246 +21,216 @@ def create_ui(): from .utils import check_diffusers_cache with gr.Blocks(analytics_enabled=False) as ui: - with gr.Row(): - with gr.Tabs(elem_id="tabs_onnx"): - with gr.TabItem("Manage execution providers", id="onnxep"): - gr.Markdown("Uninstall existing execution provider and install another one.") - - choices = [] - - for ep in ExecutionProvider: - choices.append(ep) - - ep_default = None - if cmd_opts.use_directml: - ep_default = ExecutionProvider.DirectML - elif cmd_opts.use_cuda: - ep_default = ExecutionProvider.CUDA - elif cmd_opts.use_rocm: - ep_default = ExecutionProvider.ROCm - elif cmd_opts.use_openvino: - ep_default = ExecutionProvider.OpenVINO - - ep_checkbox = gr.Radio(label="Execution provider", value=ep_default, choices=choices) - ep_install = gr.Button(value="Install") - gr.Markdown("**Warning! If you are trying to reinstall, it may not work due to permission issue.**") - - ep_install.click(fn=install_execution_provider, inputs=ep_checkbox) - - if opts.olive_enable: + with gr.Tabs(elem_id="tabs_onnx"): + with gr.TabItem("Provider", id="onnxep"): + gr.Markdown("Install ONNX execution provider") + ep_default = None + if cmd_opts.use_directml: + ep_default = ExecutionProvider.DirectML + elif cmd_opts.use_cuda: + ep_default = ExecutionProvider.CUDA + elif cmd_opts.use_rocm: + ep_default = ExecutionProvider.ROCm + elif cmd_opts.use_openvino: + ep_default = ExecutionProvider.OpenVINO + ep_checkbox = gr.Radio(label="Execution provider", value=ep_default, choices=ExecutionProvider) + ep_install = gr.Button(value="Reinstall") + ep_log = gr.HTML("") + ep_install.click(fn=install_execution_provider, inputs=[ep_checkbox], outputs=[ep_log]) + + if opts.cuda_compile_backend == "olive-ai": import olive.passes as olive_passes from olive.hardware.accelerator import AcceleratorSpec, Device - accelerator = AcceleratorSpec(accelerator_type=Device.GPU, execution_provider=opts.onnx_execution_provider) - with gr.Tabs(elem_id="tabs_olive"): - with gr.TabItem("Manage cache", id="manage_cache"): - cache_state_dirname = gr.Textbox(value=None, visible=False) - - with gr.Row(): - model_dropdown = gr.Dropdown(label="Model", value="Please select model", choices=checkpoint_tiles()) - create_refresh_button(model_dropdown, refresh_checkpoints, {}, "onnx_cache_refresh_diffusers_model") - - with gr.Row(): - def remove_cache_onnx_converted(dirname: str): - shutil.rmtree(os.path.join(opts.onnx_cached_models_path, dirname)) - print(f"ONNX converted cache of '{dirname}' is removed.") - - cache_onnx_converted = gr.Markdown("Please select model") - cache_remove_onnx_converted = gr.Button(value="Remove cache", visible=False) - cache_remove_onnx_converted.click(fn=remove_cache_onnx_converted, inputs=[cache_state_dirname,]) - - with gr.Column(): - cache_optimized_selected = gr.Textbox(value=None, visible=False) - - def select_cache_optimized(evt: gr.SelectData, data): - return ",".join(data[evt.index[0]]) - - def remove_cache_optimized(dirname: str, s: str): - if s == "": - return - size = s.split(",") - shutil.rmtree(os.path.join(opts.onnx_cached_models_path, f"{dirname}-{size[0]}w-{size[1]}h")) - print(f"Olive processed cache of '{dirname}' is removed: width={size[0]}, height={size[1]}") - - with gr.Row(): - cache_list_optimized_headers = ["height", "width"] - cache_list_optimized_types = ["str", "str"] - cache_list_optimized = gr.Dataframe(None, label="Optimized caches", show_label=True, overflow_row_behaviour='paginate', interactive=False, max_rows=10, headers=cache_list_optimized_headers, datatype=cache_list_optimized_types, type="array") - cache_list_optimized.select(fn=select_cache_optimized, inputs=[cache_list_optimized,], outputs=[cache_optimized_selected,]) - - cache_remove_optimized = gr.Button(value="Remove selected cache", visible=False) - cache_remove_optimized.click(fn=remove_cache_optimized, inputs=[cache_state_dirname, cache_optimized_selected,]) - - def cache_update_menus(query: str): - checkpoint_info = get_closet_checkpoint_match(query) - if checkpoint_info is None: - print(f"Could not find checkpoint object for '{query}'.") + with gr.TabItem("Manage cache", id="manage_cache"): + cache_state_dirname = gr.Textbox(value=None, visible=False) + with gr.Row(): + model_dropdown = gr.Dropdown(label="Model", value="Please select model", choices=checkpoint_tiles()) + create_refresh_button(model_dropdown, refresh_checkpoints, {}, "onnx_cache_refresh_diffusers_model") + with gr.Row(): + def remove_cache_onnx_converted(dirname: str): + shutil.rmtree(os.path.join(opts.onnx_cached_models_path, dirname)) + print(f"ONNX converted cache of '{dirname}' is removed.") + cache_onnx_converted = gr.Markdown("Please select model") + cache_remove_onnx_converted = gr.Button(value="Remove cache", visible=False) + cache_remove_onnx_converted.click(fn=remove_cache_onnx_converted, inputs=[cache_state_dirname,]) + with gr.Column(): + cache_optimized_selected = gr.Textbox(value=None, visible=False) + def select_cache_optimized(evt: gr.SelectData, data): + return ",".join(data[evt.index[0]]) + def remove_cache_optimized(dirname: str, s: str): + if s == "": return - model_name = os.path.basename(os.path.dirname(os.path.dirname(checkpoint_info.path)) if check_diffusers_cache(checkpoint_info.path) else checkpoint_info.path) - caches = os.listdir(opts.onnx_cached_models_path) - onnx_converted = False - optimized_sizes = [] - for cache in caches: - if cache == model_name: - onnx_converted = True - elif model_name in cache: - try: - splitted = cache.split("-") - height = splitted[-1][:-1] - width = splitted[-2][:-1] - optimized_sizes.append((width, height,)) - except Exception: - pass - return ( - model_name, - cache_onnx_converted.update(value="ONNX model cache of this model exists." if onnx_converted else "ONNX model cache of this model does not exist."), - cache_remove_onnx_converted.update(visible=onnx_converted), - None if len(optimized_sizes) == 0 else optimized_sizes, - cache_remove_optimized.update(visible=True), - ) - - model_dropdown.change(fn=cache_update_menus, inputs=[model_dropdown,], outputs=[ - cache_state_dirname, - cache_onnx_converted, cache_remove_onnx_converted, - cache_list_optimized, cache_remove_optimized, - ]) - - with gr.TabItem("Customize pass flow", id="pass_flow"): - with gr.Tabs(elem_id="tabs_model_type"): - with gr.TabItem("Stable Diffusion", id="sd"): - sd_config_path = os.path.join(sd_configs_path, "olive", "sd") - sd_submodels = os.listdir(sd_config_path) - sd_configs: Dict[str, Dict[str, Dict[str, Dict]]] = {} - sd_pass_config_components: Dict[str, Dict[str, Dict]] = {} - - with gr.Tabs(elem_id="tabs_sd_submodel"): - def sd_create_change_listener(*args): - def listener(v: Dict): - get_recursively(sd_configs, *args[:-1])[args[-1]] = v - return listener - - for submodel in sd_submodels: - config: Dict = None - - sd_pass_config_components[submodel] = {} - - with open(os.path.join(sd_config_path, submodel), "r", encoding="utf-8") as file: - config = json.load(file) - sd_configs[submodel] = config - - submodel_name = submodel[:-5] - with gr.TabItem(submodel_name, id=f"sd_{submodel_name}"): - pass_flows = DropdownMulti(label="Pass flow", value=sd_configs[submodel]["pass_flows"][0], choices=sd_configs[submodel]["passes"].keys()) - pass_flows.change(fn=sd_create_change_listener(submodel, "pass_flows", 0), inputs=pass_flows) - - with gr.Tabs(elem_id=f"tabs_sd_{submodel_name}_pass"): - for pass_name in sd_configs[submodel]["passes"]: - sd_pass_config_components[submodel][pass_name] = {} - - with gr.TabItem(pass_name, id=f"sd_{submodel_name}_pass_{pass_name}"): - config_dict = sd_configs[submodel]["passes"][pass_name] - - pass_type = gr.Dropdown(label="Type", value=config_dict["type"], choices=(x.__name__ for x in tuple(olive_passes.REGISTRY.values()))) - - - def create_pass_config_change_listener(submodel, pass_name, config_key): - def listener(value): - sd_configs[submodel]["passes"][pass_name]["config"][config_key] = value - return listener - - - for config_key, v in getattr(olive_passes, config_dict["type"], olive_passes.Pass)._default_config(accelerator).items(): # pylint: disable=protected-access - component = None - - if v.type_ == bool: - component = gr.Checkbox - elif v.type_ == str: - component = gr.Textbox - elif v.type_ == int: - component = gr.Number - - if component is not None: - component = component(value=config_dict["config"][config_key] if config_key in config_dict["config"] else v.default_value, label=config_key) - sd_pass_config_components[submodel][pass_name][config_key] = component - component.change(fn=create_pass_config_change_listener(submodel, pass_name, config_key), inputs=component) - - pass_type.change(fn=sd_create_change_listener(submodel, "passes", config_key, "type"), inputs=pass_type) # pylint: disable=undefined-loop-variable - - def sd_save(): - for k, v in sd_configs.items(): - with open(os.path.join(sd_config_path, k), "w", encoding="utf-8") as file: - json.dump(v, file) - print("Olive: config for SD was saved.") - - sd_save_button = gr.Button(value="Save") - sd_save_button.click(fn=sd_save) - - with gr.TabItem("Stable Diffusion XL", id="sdxl"): - sdxl_config_path = os.path.join(sd_configs_path, "olive", "sdxl") - sdxl_submodels = os.listdir(sdxl_config_path) - sdxl_configs: Dict[str, Dict[str, Dict[str, Dict]]] = {} - sdxl_pass_config_components: Dict[str, Dict[str, Dict]] = {} - - with gr.Tabs(elem_id="tabs_sdxl_submodel"): - def sdxl_create_change_listener(*args): - def listener(v: Dict): - get_recursively(sdxl_configs, *args[:-1])[args[-1]] = v - return listener - - for submodel in sdxl_submodels: - config: Dict = None - - sdxl_pass_config_components[submodel] = {} - - with open(os.path.join(sdxl_config_path, submodel), "r", encoding="utf-8") as file: - config = json.load(file) - sdxl_configs[submodel] = config - - submodel_name = submodel[:-5] - with gr.TabItem(submodel_name, id=f"sdxl_{submodel_name}"): - pass_flows = DropdownMulti(label="Pass flow", value=sdxl_configs[submodel]["pass_flows"][0], choices=sdxl_configs[submodel]["passes"].keys()) - pass_flows.change(fn=sdxl_create_change_listener(submodel, "pass_flows", 0), inputs=pass_flows) - - with gr.Tabs(elem_id=f"tabs_sdxl_{submodel_name}_pass"): - for pass_name in sdxl_configs[submodel]["passes"]: - sdxl_pass_config_components[submodel][pass_name] = {} - - with gr.TabItem(pass_name, id=f"sdxl_{submodel_name}_pass_{pass_name}"): - config_dict = sdxl_configs[submodel]["passes"][pass_name] - - pass_type = gr.Dropdown(label="Type", value=sdxl_configs[submodel]["passes"][pass_name]["type"], choices=(x.__name__ for x in tuple(olive_passes.REGISTRY.values()))) - - - def create_pass_config_change_listener(submodel, pass_name, config_key): # pylint: disable=function-redefined - def listener(value): - sdxl_configs[submodel]["passes"][pass_name]["config"][config_key] = value - return listener - - - for config_key, v in getattr(olive_passes, config_dict["type"], olive_passes.Pass)._default_config(accelerator).items(): # pylint: disable=protected-access - component = None - - if v.type_ == bool: - component = gr.Checkbox - elif v.type_ == str: - component = gr.Textbox - elif v.type_ == int: - component = gr.Number - - if component is not None: - component = component(value=config_dict["config"][config_key] if config_key in config_dict["config"] else v.default_value, label=config_key) - sdxl_pass_config_components[submodel][pass_name][config_key] = component - component.change(fn=create_pass_config_change_listener(submodel, pass_name, config_key), inputs=component) - - pass_type.change(fn=sdxl_create_change_listener(submodel, "passes", pass_name, "type"), inputs=pass_type) - - def sdxl_save(): - for k, v in sdxl_configs.items(): - with open(os.path.join(sdxl_config_path, k), "w", encoding="utf-8") as file: - json.dump(v, file) - print("Olive: config for SDXL was saved.") - - sdxl_save_button = gr.Button(value="Save") - sdxl_save_button.click(fn=sdxl_save) + size = s.split(",") + shutil.rmtree(os.path.join(opts.onnx_cached_models_path, f"{dirname}-{size[0]}w-{size[1]}h")) + print(f"Olive processed cache of '{dirname}' is removed: width={size[0]}, height={size[1]}") + with gr.Row(): + cache_list_optimized_headers = ["height", "width"] + cache_list_optimized_types = ["str", "str"] + cache_list_optimized = gr.Dataframe(None, label="Optimized caches", show_label=True, overflow_row_behaviour='paginate', interactive=False, max_rows=10, headers=cache_list_optimized_headers, datatype=cache_list_optimized_types, type="array") + cache_list_optimized.select(fn=select_cache_optimized, inputs=[cache_list_optimized,], outputs=[cache_optimized_selected,]) + cache_remove_optimized = gr.Button(value="Remove selected cache", visible=False) + cache_remove_optimized.click(fn=remove_cache_optimized, inputs=[cache_state_dirname, cache_optimized_selected,]) + + def cache_update_menus(query: str): + checkpoint_info = get_closet_checkpoint_match(query) + if checkpoint_info is None: + print(f"Could not find checkpoint object for '{query}'.") + return + model_name = os.path.basename(os.path.dirname(os.path.dirname(checkpoint_info.path)) if check_diffusers_cache(checkpoint_info.path) else checkpoint_info.path) + caches = os.listdir(opts.onnx_cached_models_path) + onnx_converted = False + optimized_sizes = [] + for cache in caches: + if cache == model_name: + onnx_converted = True + elif model_name in cache: + try: + splitted = cache.split("-") + height = splitted[-1][:-1] + width = splitted[-2][:-1] + optimized_sizes.append((width, height,)) + except Exception: + pass + return ( + model_name, + cache_onnx_converted.update(value="ONNX model cache of this model exists." if onnx_converted else "ONNX model cache of this model does not exist."), + cache_remove_onnx_converted.update(visible=onnx_converted), + None if len(optimized_sizes) == 0 else optimized_sizes, + cache_remove_optimized.update(visible=True), + ) + + model_dropdown.change(fn=cache_update_menus, inputs=[model_dropdown,], outputs=[ + cache_state_dirname, + cache_onnx_converted, cache_remove_onnx_converted, + cache_list_optimized, cache_remove_optimized, + ]) + + with gr.TabItem("Customize pass flow", id="pass_flow"): + with gr.Tabs(elem_id="tabs_model_type"): + with gr.TabItem("Stable Diffusion", id="sd"): + sd_config_path = os.path.join(sd_configs_path, "olive", "sd") + sd_submodels = os.listdir(sd_config_path) + sd_configs: Dict[str, Dict[str, Dict[str, Dict]]] = {} + sd_pass_config_components: Dict[str, Dict[str, Dict]] = {} + + with gr.Tabs(elem_id="tabs_sd_submodel"): + def sd_create_change_listener(*args): + def listener(v: Dict): + get_recursively(sd_configs, *args[:-1])[args[-1]] = v + return listener + + for submodel in sd_submodels: + config: Dict = None + sd_pass_config_components[submodel] = {} + with open(os.path.join(sd_config_path, submodel), "r", encoding="utf-8") as file: + config = json.load(file) + sd_configs[submodel] = config + + submodel_name = submodel[:-5] + with gr.TabItem(submodel_name, id=f"sd_{submodel_name}"): + pass_flows = DropdownMulti(label="Pass flow", value=sd_configs[submodel]["pass_flows"][0], choices=sd_configs[submodel]["passes"].keys()) + pass_flows.change(fn=sd_create_change_listener(submodel, "pass_flows", 0), inputs=pass_flows) + + with gr.Tabs(elem_id=f"tabs_sd_{submodel_name}_pass"): + for pass_name in sd_configs[submodel]["passes"]: + sd_pass_config_components[submodel][pass_name] = {} + + with gr.TabItem(pass_name, id=f"sd_{submodel_name}_pass_{pass_name}"): + config_dict = sd_configs[submodel]["passes"][pass_name] + pass_type = gr.Dropdown(label="Type", value=config_dict["type"], choices=(x.__name__ for x in tuple(olive_passes.REGISTRY.values()))) + + def create_pass_config_change_listener(submodel, pass_name, config_key): + def listener(value): + sd_configs[submodel]["passes"][pass_name]["config"][config_key] = value + return listener + + pass_cls = getattr(olive_passes, config_dict["type"], None) + default_config = {} if pass_cls is None else pass_cls._default_config(accelerator) # pylint: disable=protected-access + for config_key, v in default_config.items(): + component = None + if v.type_ == bool: + component = gr.Checkbox + elif v.type_ == str: + component = gr.Textbox + elif v.type_ == int: + component = gr.Number + if component is not None: + component = component(value=config_dict["config"][config_key] if config_key in config_dict["config"] else v.default_value, label=config_key) + sd_pass_config_components[submodel][pass_name][config_key] = component + component.change(fn=create_pass_config_change_listener(submodel, pass_name, config_key), inputs=component) + + pass_type.change(fn=sd_create_change_listener(submodel, "passes", pass_name, "type"), inputs=pass_type) + + def sd_save(): + for k, v in sd_configs.items(): + with open(os.path.join(sd_config_path, k), "w", encoding="utf-8") as file: + json.dump(v, file) + print("Olive: config for SD was saved.") + + sd_save_button = gr.Button(value="Save") + sd_save_button.click(fn=sd_save) + + with gr.TabItem("Stable Diffusion XL", id="sdxl"): + sdxl_config_path = os.path.join(sd_configs_path, "olive", "sdxl") + sdxl_submodels = os.listdir(sdxl_config_path) + sdxl_configs: Dict[str, Dict[str, Dict[str, Dict]]] = {} + sdxl_pass_config_components: Dict[str, Dict[str, Dict]] = {} + + with gr.Tabs(elem_id="tabs_sdxl_submodel"): + def sdxl_create_change_listener(*args): + def listener(v: Dict): + get_recursively(sdxl_configs, *args[:-1])[args[-1]] = v + return listener + + for submodel in sdxl_submodels: + config: Dict = None + sdxl_pass_config_components[submodel] = {} + with open(os.path.join(sdxl_config_path, submodel), "r", encoding="utf-8") as file: + config = json.load(file) + sdxl_configs[submodel] = config + + submodel_name = submodel[:-5] + with gr.TabItem(submodel_name, id=f"sdxl_{submodel_name}"): + pass_flows = DropdownMulti(label="Pass flow", value=sdxl_configs[submodel]["pass_flows"][0], choices=sdxl_configs[submodel]["passes"].keys()) + pass_flows.change(fn=sdxl_create_change_listener(submodel, "pass_flows", 0), inputs=pass_flows) + + with gr.Tabs(elem_id=f"tabs_sdxl_{submodel_name}_pass"): + for pass_name in sdxl_configs[submodel]["passes"]: + sdxl_pass_config_components[submodel][pass_name] = {} + + with gr.TabItem(pass_name, id=f"sdxl_{submodel_name}_pass_{pass_name}"): + config_dict = sdxl_configs[submodel]["passes"][pass_name] + pass_type = gr.Dropdown(label="Type", value=sdxl_configs[submodel]["passes"][pass_name]["type"], choices=(x.__name__ for x in tuple(olive_passes.REGISTRY.values()))) + + def create_pass_config_change_listener(submodel, pass_name, config_key): # pylint: disable=function-redefined + def listener(value): + sdxl_configs[submodel]["passes"][pass_name]["config"][config_key] = value + return listener + + pass_cls = getattr(olive_passes, config_dict["type"], None) + default_config = {} if pass_cls is None else pass_cls._default_config(accelerator) # pylint: disable=protected-access + for config_key, v in default_config.items(): + component = None + if v.type_ == bool: + component = gr.Checkbox + elif v.type_ == str: + component = gr.Textbox + elif v.type_ == int: + component = gr.Number + if component is not None: + component = component(value=config_dict["config"][config_key] if config_key in config_dict["config"] else v.default_value, label=config_key) + sdxl_pass_config_components[submodel][pass_name][config_key] = component + component.change(fn=create_pass_config_change_listener(submodel, pass_name, config_key), inputs=component) + pass_type.change(fn=sdxl_create_change_listener(submodel, "passes", pass_name, "type"), inputs=pass_type) + + def sdxl_save(): + for k, v in sdxl_configs.items(): + with open(os.path.join(sdxl_config_path, k), "w", encoding="utf-8") as file: + json.dump(v, file) + print("Olive: config for SDXL was saved.") + + sdxl_save_button = gr.Button(value="Save") + sdxl_save_button.click(fn=sdxl_save) return ui diff --git a/modules/onnx_impl/utils.py b/modules/onnx_impl/utils.py index 259a1af1fe9..54fd020ee37 100644 --- a/modules/onnx_impl/utils.py +++ b/modules/onnx_impl/utils.py @@ -38,8 +38,8 @@ def move_inference_session(session: ort.InferenceSession, device: torch.device): def check_diffusers_cache(path: os.PathLike): - #from modules.shared import opts - return False#opts.diffusers_dir in os.path.abspath(path) + from modules.shared import opts + return opts.diffusers_dir in os.path.abspath(path) def check_pipeline_sdxl(cls: Type[diffusers.DiffusionPipeline]) -> bool: diff --git a/modules/shared_init.py b/modules/shared_init.py index d3de19aad1e..4118daaab75 100644 --- a/modules/shared_init.py +++ b/modules/shared_init.py @@ -60,7 +60,7 @@ def initialize(): shared.mem_mon.start() if not cmd_opts.skip_ort: - from modules.onnx_impl import initialize as initialize_onnx + from modules.onnx_impl import initialize_onnx initialize_onnx() initialize_zluda()