diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index d979dd8ce..3cb681e33 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -15,11 +15,22 @@ env: REGION_ID: cn-beijing ACR_ORG: registry.cn-beijing.aliyuncs.com/oneflow COMFYUI_SRC_DIR: ComfyUI + WEBUI_SRC_DIR: stable-diffusion-webui + WEBUI_DEPENDENCIES_SUBDIR: repos SDXL_BASE: /share_nfs/hf_models/sd_xl_base_1.0.safetensors UNET_INT8: /share_nfs/hf_models/unet_int8 CONTROL_LORA_OPENPOSEXL2_RANK256: /share_nfs/hf_models/controlnet/control-lora-openposeXL2-rank256.safetensors SELENIUM_CONTAINER_NAME: selenium-test SELENIUM_IMAGE: standalone-chrome:119.0-chromedriver-119.0-grid-4.15.0-20231129 + + # For git repos required by webui + ASSETS_COMMIT_HASH: 6f7db241d2f8ba7457bac5ca9753331f0c266917 + STABLE_DIFFUSION_COMMIT_HASH: cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf + STABLE_DIFFUSION_XL_COMMIT_HASH: 45c443b316737a4ab6e40413d7794a7f5657c19f + K_DIFFUSION_COMMIT_HASH: ab527a9a6d347f364e3d185ba6d714e22d80cb3c + BLIP_COMMIT_HASH: 48211a1594f1321b00f14c9f7a5b4813144b2fb9 + + concurrency: group: sd-examples-${{ github.ref }} cancel-in-progress: true @@ -31,6 +42,7 @@ jobs: outputs: onediff_src_url: ${{ steps.upload_to_oss.outputs.onediff_src_url }} comfy_src_url: ${{ steps.upload_to_oss.outputs.comfy_src_url }} + webui_src_url: ${{ steps.upload_to_oss.outputs.webui_src_url }} steps: - name: Setup ossutil run: | @@ -46,6 +58,57 @@ jobs: with: repository: comfyanonymous/ComfyUI path: ComfyUI + - name: Checkout Stable Diffusion WebUI + uses: actions/checkout@v4 + with: + repository: AUTOMATIC1111/stable-diffusion-webui + path: ${{ env.WEBUI_SRC_DIR }} + + # -------- The following are the dependencies required by webui -------- + - name: Checkout CLIP (dependency of webui) + uses: actions/checkout@v4 + with: + repository: openai/CLIP + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/CLIP + ref: d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 + - name: Checkout open clip (dependency of webui) + uses: actions/checkout@v4 + with: + repository: mlfoundations/open_clip + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/open_clip + ref: bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b + - name: Checkout ${{ env.WEBUI_SRC_DIR }}-assets (dependency of webui) + uses: actions/checkout@v4 + with: + repository: AUTOMATIC1111/stable-diffusion-webui-assets + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-webui-assets + ref: ${{ env.ASSETS_COMMIT_HASH }} + - name: Checkout stablediffusion (dependency of webui) + uses: actions/checkout@v4 + with: + repository: Stability-AI/stablediffusion + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-stability-ai + ref: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }} + - name: Checkout generative-models (dependency of webui) + uses: actions/checkout@v4 + with: + repository: Stability-AI/generative-models + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/generative-models + ref: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }} + - name: Checkout k-diffusion (dependency of webui) + uses: actions/checkout@v4 + with: + repository: crowsonkb/k-diffusion + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/k-diffusion + ref: ${{ env.K_DIFFUSION_COMMIT_HASH }} + - name: Checkout BLIP (dependency of webui) + uses: actions/checkout@v4 + with: + repository: salesforce/BLIP + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/BLIP + ref: ${{ env.BLIP_COMMIT_HASH }} + # -------- The above are the dependencies required by webui -------- + - name: Pack src working-directory: onediff run: | @@ -58,15 +121,24 @@ jobs: git reset --hard git clean -f git archive --prefix ${{ env.COMFYUI_SRC_DIR }}/ --format zip HEAD > comfyui-src.zip + - name: Pack webui + working-directory: ${{ env.WEBUI_SRC_DIR }} + run: | + git reset --hard + git clean -f + zip -r webui-src.zip . - name: Upload src id: upload_to_oss run: | ONEDIFF_DST="oss://gh-src-cache/onediff/${{ github.sha }}/onediff-src.zip" COMFY_DST="oss://gh-src-cache/onediff/${{ github.sha }}/comfyui-src.zip" + WEBUI_DST="oss://gh-src-cache/onediff/${{ github.sha }}/webui-src.zip" ./ossutil64 cp --disable-ignore-error --update onediff/onediff-src.zip ${ONEDIFF_DST} ./ossutil64 cp --disable-ignore-error --update ComfyUI/comfyui-src.zip ${COMFY_DST} + ./ossutil64 cp --disable-ignore-error --update ${{ env.WEBUI_SRC_DIR }}/webui-src.zip ${WEBUI_DST} echo "onediff_src_url=${ONEDIFF_DST}" >> $GITHUB_OUTPUT echo "comfy_src_url=${COMFY_DST}" >> $GITHUB_OUTPUT + echo "webui_src_url=${WEBUI_DST}" >> $GITHUB_OUTPUT run-examples: name: " ${{ matrix.test-suite }} ${{ matrix.image }}" runs-on: [self-hosted, cuda] @@ -81,6 +153,7 @@ jobs: test-suite: - diffusers_examples - comfy + - webui steps: - name: Login to ACR with the AccessKey pair uses: aliyun/acr-login@v1 @@ -110,6 +183,11 @@ jobs: run: | $HOME/ossutil64 cp ${{ needs.upload_src.outputs.comfy_src_url }} . unzip -o $(basename ${{ needs.upload_src.outputs.comfy_src_url }}) + - name: Checkout WebUI vis OSS + if: matrix.test-suite == 'webui' && github.repository == 'siliconflow/onediff' + run: | + $HOME/ossutil64 cp ${{ needs.upload_src.outputs.webui_src_url }} . + unzip -o $(basename ${{ needs.upload_src.outputs.webui_src_url }}) -d ${{env.WEBUI_SRC_DIR}} - name: Checkout if: github.repository != 'siliconflow/onediff' uses: actions/checkout@v4 @@ -119,6 +197,12 @@ jobs: with: repository: comfyanonymous/ComfyUI path: ${{ env.COMFYUI_SRC_DIR }} + - name: Checkout Stable Diffusion WebUI + if: matrix.test-suite == 'webui' && github.repository != 'siliconflow/onediff' + uses: actions/checkout@v4 + with: + repository: AUTOMATIC1111/stable-diffusion-webui + path: ${{ env.WEBUI_SRC_DIR }} - name: Clean docker containers run: | docker rm -f ${{ env.CONTAINER_NAME }} || true @@ -158,6 +242,25 @@ jobs: SDXL_BASE: ${{ env.SDXL_BASE }} UNET_INT8: ${{ env.UNET_INT8 }} SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} + + - name: Setup docker for WebUI Test + if: matrix.test-suite == 'webui' + run: | + env + docker compose -f tests/webui-docker-compose.yml up -d + env: + CONTAINER_NAME: ${{ env.CONTAINER_NAME }} + MATRIX_IMAGE: ${{ matrix.image }} + WEBUI_SRC_DIR: ${{ env.WEBUI_SRC_DIR }} + WEBUI_DEPENDENCIES_SUBDIR: ${{ env.WEBUI_DEPENDENCIES_SUBDIR }} + SELENIUM_IMAGE: ${{ env.SELENIUM_IMAGE }} + SELENIUM_CONTAINER_NAME: ${{ env.SELENIUM_CONTAINER_NAME }} + SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} + ASSETS_COMMIT_HASH: ${{ env.ASSETS_COMMIT_HASH }} + STABLE_DIFFUSION_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }} + STABLE_DIFFUSION_XL_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }} + K_DIFFUSION_COMMIT_HASH: ${{ env.K_DIFFUSION_COMMIT_HASH }} + BLIP_COMMIT_HASH: ${{ env.BLIP_COMMIT_HASH }} - run: nvidia-smi - run: nvidia-smi -L @@ -197,6 +300,7 @@ jobs: run_comfy_test "workflows/sdxl-unet-speedup-graph-saver.json" 200 run_comfy_test "workflows/sdxl-control-lora-speedup.json" 200 + run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/ipadapter_advanced.json" 200 run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache.json" 600 run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache-with-lora.json" 800 # run_comfy_test "workflows/text-to-video-speedup.json" 5000 @@ -234,7 +338,9 @@ jobs: - if: matrix.test-suite == 'diffusers_examples' run: docker exec -w /src/onediff/onediff_diffusers_extensions ${{ env.CONTAINER_NAME }} python3 examples/text_to_image_sdxl_turbo.py --compile true --base /share_nfs/hf_models/sdxl-turbo - if: matrix.test-suite == 'diffusers_examples' - run: docker exec -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 -m pytest -v onediff_diffusers_extensions/tests/test_lora.py + run: | + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip install scikit-image -i https://pypi.tuna.tsinghua.edu.cn/simple + docker exec -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 -m pytest -v onediff_diffusers_extensions/tests/test_lora.py # - if: matrix.test-suite == 'diffusers_examples' # run: docker exec -w /src/onediff/onediff_diffusers_extensions -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 examples/text_to_image_sdxl_reuse_pipe.py --base /share_nfs/hf_models/stable-diffusion-xl-base-1.0 --new_base /share_nfs/hf_models/dataautogpt3-OpenDalleV1.1 - if: matrix.test-suite == 'diffusers_examples' && startsWith(matrix.image, 'onediff-pro') @@ -242,6 +348,67 @@ jobs: docker exec -w /src/onediff ${{ env.CONTAINER_NAME }} python3 onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py --model /share_nfs/hf_models/stable-diffusion-v1-5-int8 --width 512 --height 512 --saved_image /src/onediff/output_enterprise_sd.png docker exec -w /src/onediff ${{ env.CONTAINER_NAME }} python3 tests/test_quantitative_quality.py + - name: Install Requirements for WebUI + if: matrix.test-suite == 'webui' + run: | + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip config set global.extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip install pytorch-lightning gradio==3.41.2 diskcache gitpython pytorch_lightning==1.9.4 scikit-image jsonmerge pillow-avif-plugin torchdiffeq torchsde clean-fid resize-right lark tomesd blendmodes facexlib opencv-python==4.8.0.74 piexif inflection ftfy regex tqdm pydantic==1.10.13 + + - name: Prepare environment for WebUI + if: matrix.test-suite == 'webui' + run: | + # hack code to print error msg for debugging + # docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} sed -i '/except RuntimeError:/c\ except RuntimeError as e:\n print(f"Error occurred while running git command: {e}")' modules/launch_utils.py + docker exec -d ${{ env.CONTAINER_NAME }} mkdir /app/${{ env.WEBUI_SRC_DIR }}/.git /app/${{ env.WEBUI_SRC_DIR }}/openai + docker exec -d ${{ env.CONTAINER_NAME }} ln -s /share_nfs/onediff_ci/sd-webui/models/clips/clip-vit-large-patch14 /app/${{ env.WEBUI_SRC_DIR }}/openai/clip-vit-large-patch14 + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} git config --global --add safe.directory /app/${{ env.WEBUI_SRC_DIR }} + for dir in $SAFE_DIRECTORIES; do + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} git config --global --add safe.directory /app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/$dir + echo /app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/$dir + done + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -e venv_dir=- ${{ env.CONTAINER_NAME }} sh -c "bash webui.sh -f --exit --api --no-download-sd-model --do-not-download-clip --disable-safe-unpickle --ckpt-dir /share_nfs/onediff_ci/sd-webui/models" + + # env: + # INDEX_URL: "https://pypi.tuna.tsinghua.edu.cn/simple" + # CLIP_PACKAGE: "git+file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/CLIP" + # OPENCLIP_PACKAGE: "git+file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/open_clip" + # ASSETS_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-webui-assets" + # STABLE_DIFFUSION_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-stability-ai" + # STABLE_DIFFUSION_XL_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/generative-models" + # K_DIFFUSION_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/k-diffusion" + # BLIP_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/BLIP" + + # ASSETS_COMMIT_HASH: ${{ env.ASSETS_COMMIT_HASH }} + # STABLE_DIFFUSION_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }} + # STABLE_DIFFUSION_XL_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }} + # K_DIFFUSION_COMMIT_HASH: ${{ env.K_DIFFUSION_COMMIT_HASH }} + # BLIP_COMMIT_HASH: ${{ env.BLIP_COMMIT_HASH }} + + # SAFE_DIRECTORIES: | + # CLIP + # open_clip + # stable-diffusion-webui-assets + # stable-diffusion-stability-ai + # generative-models + # k-diffusion + # BLIP + + - name: Start WebUI Web Service + if: matrix.test-suite == 'webui' + run: | + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} sh -c "python3 webui.py --port 7860 --api --no-download-sd-model --do-not-download-clip --disable-safe-unpickle --ckpt-dir /share_nfs/onediff_ci/sd-webui/models --skip-version-check > /app/${{ env.WEBUI_SRC_DIR }}/onediff_webui.log 2>&1" + sleep 60 + + - run: docker exec ${{ env.CONTAINER_NAME }} ps aux + + - if: matrix.test-suite == 'webui' + run: docker exec ${{ env.CONTAINER_NAME }} python3 -m pytest -v -s tests/sd-webui/test_api.py + + - name: Show WebUI Log + if: matrix.test-suite == 'webui' + run: docker exec ${{ env.CONTAINER_NAME }} cat /app/${{ env.WEBUI_SRC_DIR }}/onediff_webui.log + - name: Shutdown docker for ComfyUI Test if: matrix.test-suite == 'comfy' run: | @@ -268,3 +435,16 @@ jobs: SDXL_BASE: ${{ env.SDXL_BASE }} UNET_INT8: ${{ env.UNET_INT8 }} SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} + + - name: Shutdown docker for WebUI Test + if: matrix.test-suite == 'webui' + run: | + docker compose -f tests/webui-docker-compose.yml down + env: + CONTAINER_NAME: ${{ env.CONTAINER_NAME }} + ACR_ORG: ${{ env.ACR_ORG }} + MATRIX_IMAGE: ${{ matrix.image }} + WEBUI_SRC_DIR: ${{ env.WEBUI_SRC_DIR }} + SELENIUM_IMAGE: ${{ env.SELENIUM_IMAGE }} + SELENIUM_CONTAINER_NAME: ${{ env.SELENIUM_CONTAINER_NAME }} + SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} diff --git a/.gitignore b/.gitignore index e68a35b10..677317453 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,6 @@ unet_graphs # onediff_comfy_nodes *.pt *.graph + +# onediff_sd_webui_extensions +onediff_sd_webui_extensions/compiled_caches/ diff --git a/benchmarks/image_to_video.py b/benchmarks/image_to_video.py index 730ec752b..fcc9e19cd 100644 --- a/benchmarks/image_to_video.py +++ b/benchmarks/image_to_video.py @@ -41,7 +41,7 @@ import oneflow as flow import torch -from onediffx import compile_pipe, compile_options +from onediffx import compile_pipe, OneflowCompileOptions from diffusers.utils import load_image, export_to_video @@ -189,7 +189,8 @@ def main(): # especially for 40xx series cards. # So here by partially disabling the half accumulation in MHA partially, # we can get a good balance. - compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = ( + compile_options = OneflowCompileOptions() + compile_options.attention_allow_half_precision_score_accumulation_max_m = ( args.attention_fp16_score_accum_max_m ) pipe = compile_pipe(pipe, options=compile_options) diff --git a/benchmarks/patch_stable_cascade_of.py b/benchmarks/patch_stable_cascade_of.py index 8f388111a..454a17344 100644 --- a/benchmarks/patch_stable_cascade_of.py +++ b/benchmarks/patch_stable_cascade_of.py @@ -5,7 +5,7 @@ from packaging import version import importlib.metadata -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_of = transform_mgr.transform_package("diffusers") StableCascadeUnet_OF_CLS = ( @@ -120,7 +120,7 @@ def forward( ) # torch2oflow_class_map.update({StableCascadeUnet: StableCascadeUnetOflow}) -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from contextlib import contextmanager diff --git a/benchmarks/run_text_to_image_benchmark.sh b/benchmarks/run_text_to_image_benchmark.sh index 273a419b9..512aab42f 100755 --- a/benchmarks/run_text_to_image_benchmark.sh +++ b/benchmarks/run_text_to_image_benchmark.sh @@ -111,6 +111,22 @@ benchmark_sd_model sd15 ${SD15_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 benchmark_sd_model sd21 ${SD21_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 benchmark_sd_model sdxl ${SDXL_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 +benchmark_sd_model_with_throughput() { + model_path=$1 + warmups=$2 + compiler=$3 + echo "Run ${model_path} with throughput test at 1024x1024..." + script_output=$(python3 ${SCRIPT_DIR}/text_to_image.py --model ${model_path} --variant fp16 --warmups ${warmups} --compiler ${compiler} --height 1024 --width 1024 --throughput | tee /dev/tty) + + throughput=$(echo "${script_output}" | grep -oP '(?<=Throughput without base cost: )\d+\.\d+') + inference_time_eq=$(echo "${script_output}" | grep -oP 'Model: Inference Time = .+') + + BENCHMARK_RESULT_TEXT="${BENCHMARK_RESULT_TEXT}| ${model_path} | 1024x1024 | N/A | N/A | N/A | N/A | Throughput without base cost: ${throughput} | ${inference_time_eq} |\n" +} + +benchmark_sd_model_with_throughput ${SD15_MODEL_PATH} ${WARMUPS} ${COMPILER} +benchmark_sd_model_with_throughput ${SDXL_MODEL_PATH} ${WARMUPS} ${COMPILER} + if [ ${BENCHMARK_QUANT_MODEL} != 0 ] && [ x"${COMPILER}" == x"oneflow" ]; then benchmark_sd_model sdxl_quant ${SDXL_QUANT_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 fi @@ -119,4 +135,5 @@ if [ ${BENCHMARK_QUANT_MODEL} != 0 ] && [ ${BENCHMARK_DEEP_CACHE_MODEL} != 0 ] & benchmark_sd_model sdxl_deepcache_quant ${SDXL_DEEP_CACHE_QUANT_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 fi +echo -e "\nBenchmark Results:" echo -e "${BENCHMARK_RESULT_TEXT}" > ${OUTPUT_FILE} diff --git a/benchmarks/text_to_image.py b/benchmarks/text_to_image.py index a8c97c510..539e73f74 100644 --- a/benchmarks/text_to_image.py +++ b/benchmarks/text_to_image.py @@ -6,9 +6,9 @@ CONTROLNET = None STEPS = 30 PROMPT = "best quality, realistic, unreal engine, 4K, a beautiful girl" -NEGATIVE_PROMPT = None -SEED = None -WARMUPS = 3 +NEGATIVE_PROMPT = "" +SEED = 333 +WARMUPS = 1 BATCH = 1 HEIGHT = None WIDTH = None @@ -19,6 +19,8 @@ CACHE_INTERVAL = 3 CACHE_LAYER_ID = 0 CACHE_BLOCK_ID = 0 +COMPILER = "oneflow" +COMPILER_CONFIG = None import os import importlib @@ -27,6 +29,8 @@ import time import json import torch +import matplotlib.pyplot as plt +import numpy as np from PIL import Image, ImageDraw from diffusers.utils import load_image @@ -56,20 +60,30 @@ def parse_args(): parser.add_argument("--input-image", type=str, default=INPUT_IMAGE) parser.add_argument("--control-image", type=str, default=CONTROL_IMAGE) parser.add_argument("--output-image", type=str, default=OUTPUT_IMAGE) + parser.add_argument("--throughput", action="store_true") parser.add_argument("--deepcache", action="store_true") parser.add_argument( "--compiler", type=str, - default="oneflow", + default=COMPILER, choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"], ) + parser.add_argument( + "--compiler-config", + type=str, + default=COMPILER_CONFIG, + ) return parser.parse_args() +args = parse_args() + def load_pipe( pipeline_cls, model_name, variant=None, + dtype=torch.float16, + device="cuda", custom_pipeline=None, scheduler=None, lora=None, @@ -80,31 +94,34 @@ def load_pipe( extra_kwargs["custom_pipeline"] = custom_pipeline if variant is not None: extra_kwargs["variant"] = variant + if dtype is not None: + extra_kwargs["torch_dtype"] = dtype if controlnet is not None: from diffusers import ControlNetModel controlnet = ControlNetModel.from_pretrained( - controlnet, torch_dtype=torch.float16, + controlnet, torch_dtype=dtype, ) extra_kwargs["controlnet"] = controlnet if os.path.exists(os.path.join(model_name, "calibrate_info.txt")): from onediff.quantization import QuantPipeline pipe = QuantPipeline.from_quantized( - pipeline_cls, model_name, torch_dtype=torch.float16, **extra_kwargs + pipeline_cls, model_name, **extra_kwargs ) else: pipe = pipeline_cls.from_pretrained( - model_name, torch_dtype=torch.float16, **extra_kwargs + model_name, **extra_kwargs ) - if scheduler is not None: + if scheduler is not None and scheduler != "none": scheduler_cls = getattr(importlib.import_module("diffusers"), scheduler) pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config) if lora is not None: pipe.load_lora_weights(lora) pipe.fuse_lora() pipe.safety_checker = None - pipe.to(torch.device("cuda")) + if device is not None: + pipe.to(torch.device(device)) return pipe @@ -134,8 +151,52 @@ def callback_on_step_end(self, pipe, i, t, callback_kwargs={}): return callback_kwargs +def calculate_inference_time_and_throughput(height, width, n_steps, model): + start_time = time.time() + model(prompt=args.prompt, height=height, width=width, num_inference_steps=n_steps) + end_time = time.time() + inference_time = end_time - start_time + # pixels_processed = height * width * n_steps + # throughput = pixels_processed / inference_time + throughput = n_steps / inference_time + return inference_time, throughput + + +def generate_data_and_fit_model(model, steps_range): + height, width = 1024, 1024 + data = {"steps": [], "inference_time": [], "throughput": []} + + for n_steps in steps_range: + inference_time, throughput = calculate_inference_time_and_throughput(height, width, n_steps, model) + data["steps"].append(n_steps) + data["inference_time"].append(inference_time) + data["throughput"].append(throughput) + print(f"Steps: {n_steps}, Inference Time: {inference_time:.2f} seconds, Throughput: {throughput:.2f} steps/s") + + average_throughput = np.mean(data["throughput"]) + print(f"Average Throughput: {average_throughput:.2f} steps/s") + + coefficients = np.polyfit(data["steps"], data["inference_time"], 1) + base_time_without_base_cost = 1 / coefficients[0] + print(f"Throughput without base cost: {base_time_without_base_cost:.2f} steps/s") + return data, coefficients + + +def plot_data_and_model(data, coefficients): + plt.figure(figsize=(10, 5)) + plt.scatter(data["steps"], data["inference_time"], color='blue') + plt.plot(data["steps"], np.polyval(coefficients, data["steps"]), color='red') + plt.title("Inference Time vs. Steps") + plt.xlabel("Steps") + plt.ylabel("Inference Time (seconds)") + plt.grid(True) + # plt.savefig("output.png") + plt.show() + + print(f"Model: Inference Time = {coefficients[0]:.2f} * Steps + {coefficients[1]:.2f}") + + def main(): - args = parse_args() if args.input_image is None: if args.deepcache: from onediffx.deep_cache import StableDiffusionXLPipeline as pipeline_cls @@ -154,18 +215,32 @@ def main(): controlnet=args.controlnet, ) - height = args.height or pipe.unet.config.sample_size * pipe.vae_scale_factor - width = args.width or pipe.unet.config.sample_size * pipe.vae_scale_factor + core_net = None + if core_net is None: + core_net = getattr(pipe, "unet", None) + if core_net is None: + core_net = getattr(pipe, "transformer", None) + height = args.height or core_net.config.sample_size * pipe.vae_scale_factor + width = args.width or core_net.config.sample_size * pipe.vae_scale_factor if args.compiler == "none": pass elif args.compiler == "oneflow": pipe = compile_pipe(pipe) elif args.compiler == "nexfort": - pipe = compile_pipe(pipe, backend="nexfort") + if args.compiler_config is not None: + # config with dict + options = json.loads(args.compiler_config) + else: + # config with string + options = '{"mode": "max-optimize:max-autotune:freezing:benchmark:cudagraphs", "memory_format": "channels_last"}' + pipe = compile_pipe(pipe, backend="nexfort", options=options, fuse_qkv_projections=True) elif args.compiler in ("compile", "compile-max-autotune"): mode = "max-autotune" if args.compiler == "compile-max-autotune" else None - pipe.unet = torch.compile(pipe.unet, mode=mode) + if hasattr(pipe, "unet"): + pipe.unet = torch.compile(pipe.unet, mode=mode) + if hasattr(pipe, "transformer"): + pipe.transformer = torch.compile(pipe.transformer, mode=mode) if hasattr(pipe, "controlnet"): pipe.controlnet = torch.compile(pipe.controlnet, mode=mode) pipe.vae = torch.compile(pipe.vae, mode=mode) @@ -199,7 +274,6 @@ def get_kwarg_inputs(): negative_prompt=args.negative_prompt, height=height, width=width, - num_inference_steps=args.steps, num_images_per_prompt=args.batch, generator=None if args.seed is None @@ -210,6 +284,8 @@ def get_kwarg_inputs(): else json.loads(args.extra_call_kwargs) ), ) + if args.steps is not None: + kwarg_inputs["num_inference_steps"] = args.steps if input_image is not None: kwarg_inputs["image"] = input_image if control_image is not None: @@ -227,10 +303,15 @@ def get_kwarg_inputs(): # The initial calls will trigger compilation and might be very slow. # After that, it should be very fast. if args.warmups > 0: + begin = time.time() + print("=======================================") print("Begin warmup") for _ in range(args.warmups): pipe(**get_kwarg_inputs()) + end = time.time() print("End warmup") + print(f"Warmup time: {end - begin:.3f}s") + print("=======================================") # Let"s see it! # Note: Progress bar might work incorrectly due to the async nature of CUDA. @@ -255,7 +336,7 @@ def get_kwarg_inputs(): cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed() / 1024 else: cuda_mem_after_used = torch.cuda.max_memory_allocated() / (1024 ** 3) - print(f"CUDA Mem after: {cuda_mem_after_used:.3f}GiB") + print(f"Max used CUDA memory : {cuda_mem_after_used:.3f}GiB") print("=======================================") if args.output_image is not None: @@ -263,6 +344,11 @@ def get_kwarg_inputs(): else: print("Please set `--output-image` to save the output image") + if args.throughput: + steps_range = range(1, 100, 1) + data, coefficients = generate_data_and_fit_model(pipe, steps_range) + plot_data_and_model(data, coefficients) + if __name__ == "__main__": main() diff --git a/onediff_comfy_nodes/extras_nodes/nodes_compare.py b/onediff_comfy_nodes/extras_nodes/nodes_compare.py index 4f4461d9b..a06fa9edb 100644 --- a/onediff_comfy_nodes/extras_nodes/nodes_compare.py +++ b/onediff_comfy_nodes/extras_nodes/nodes_compare.py @@ -5,7 +5,7 @@ import folder_paths import numpy as np import oneflow as flow -from onediff.infer_compiler.transform.builtin_transform import torch2oflow +from onediff.infer_compiler.backends.oneflow.transform.builtin_transform import torch2oflow from PIL import Image try: @@ -148,8 +148,9 @@ def save_images( ) results = list() for image1, image2 in zip(images1, images2): + # image diff - image = image1 - image2 + image = image1.cuda() - image2.cuda() i = 255.0 * image.cpu().numpy() img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) diff --git a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py index ca22873ee..9daa567e6 100644 --- a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py +++ b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py @@ -7,7 +7,7 @@ from comfy import model_management from comfy.cli_args import args -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version from ..modules.oneflow.config import ONEDIFF_QUANTIZED_OPTIMIZED_MODELS from ..modules.oneflow.hijack_animatediff import animatediff_hijacker @@ -17,6 +17,7 @@ from ..modules.oneflow.hijack_samplers import samplers_hijack from ..modules.oneflow.hijack_comfyui_instantid import comfyui_instantid_hijacker from ..modules.oneflow.hijack_model_patcher import model_patch_hijacker +from ..modules.oneflow.hijack_utils import comfy_utils_hijack from ..modules.oneflow import BasicOneFlowBoosterExecutor from ..modules.oneflow import DeepcacheBoosterExecutor from ..modules.oneflow import PatchBoosterExecutor @@ -35,6 +36,7 @@ ipadapter_plus_hijacker.hijack() comfyui_instantid_hijacker.hijack() model_patch_hijacker.hijack() +comfy_utils_hijack.hijack() import comfy_extras.nodes_video_model from nodes import CheckpointLoaderSimple diff --git a/onediff_comfy_nodes/modules/oneflow/booster_basic.py b/onediff_comfy_nodes/modules/oneflow/booster_basic.py index 608462861..f35d4f27d 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_basic.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_basic.py @@ -7,7 +7,7 @@ from comfy.model_patcher import ModelPatcher from comfy.sd import VAE from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from ..booster_interface import BoosterExecutor from .onediff_controlnet import OneDiffControlLora @@ -47,6 +47,7 @@ def _(self, model: ModelPatcher, ckpt_name: Optional[str] = None, **kwargs): ) set_compiled_options(compiled_model, graph_file) + return model @execute.register(ControlNet) diff --git a/onediff_comfy_nodes/modules/oneflow/booster_patch.py b/onediff_comfy_nodes/modules/oneflow/booster_patch.py index 6bff76ba9..b12e1a042 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_patch.py @@ -2,7 +2,7 @@ from functools import singledispatchmethod from comfy.model_patcher import ModelPatcher -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from ..booster_interface import BoosterExecutor diff --git a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py index 7254ae0b3..f4b50d6e4 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py @@ -8,7 +8,7 @@ from comfy.controlnet import ControlNet from comfy.model_patcher import ModelPatcher from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from onediff_quant.quantization import QuantizationConfig from onediff_quant.quantization.module_operations import get_sub_module from onediff_quant.quantization.quantize_calibrators import ( diff --git a/onediff_comfy_nodes/modules/oneflow/config.py b/onediff_comfy_nodes/modules/oneflow/config.py index 353c4f024..8a6494e31 100644 --- a/onediff_comfy_nodes/modules/oneflow/config.py +++ b/onediff_comfy_nodes/modules/oneflow/config.py @@ -3,7 +3,7 @@ import sys from pathlib import Path -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version # Set up paths ONEDIFF_QUANTIZED_OPTIMIZED_MODELS = "onediff_quant" diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py index 167789792..d6340640f 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py @@ -4,8 +4,8 @@ """ import os -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py index 74f8dd9c4..3bbc579dc 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py @@ -1,7 +1,7 @@ # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/motion_module_ad.py import oneflow as torch from einops import repeat -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from ._config import animatediff_of, animatediff_pt @@ -124,7 +124,7 @@ def forward( ) # import torch as torch_pt -# from onediff.infer_compiler.transform import torch2oflow +# from onediff.infer_compiler.backends.oneflow.transform import torch2oflow # @torch2oflow.register(TemporalTransformer3DModel_PT_CLS) # def _(mod, verbose=False): diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py index 720c5ab2a..ea201069b 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py @@ -2,7 +2,7 @@ import oneflow as flow from einops import rearrange from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from oneflow.nn.functional import group_norm from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py index d1b4f3885..1fafec133 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py @@ -1,6 +1,6 @@ # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/utils_motion.py import oneflow as torch -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from ._config import animatediff_of, animatediff_pt diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py index ec2a1903e..d18438434 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py @@ -2,8 +2,8 @@ import traceback COMFYUI_ROOT = os.getenv("COMFYUI_ROOT") -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py index 37d11f083..b9da6376b 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py @@ -2,8 +2,8 @@ import traceback COMFYUI_ROOT = os.getenv("COMFYUI_ROOT") -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py index 9be19105d..588fe7971 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py @@ -1,11 +1,11 @@ +import torch from register_comfy.CrossAttentionPatch import Attn2Replace, ipadapter_attention -from onediff.infer_compiler.transform import torch2oflow +from comfy import model_management +from onediff.infer_compiler.backends.oneflow.transform import torch2oflow from ..utils.booster_utils import clear_deployable_module_cache_and_unbind from ..patch_management import PatchType, create_patch_executor -# from onediff.infer_compiler.utils.cost_util import cost_time -# @cost_time(debug=True, message="set_model_patch_replace_v2") def set_model_patch_replace_v2(org_fn, model, patch_kwargs, key): diff_model = model.model.diffusion_model cache_patch_executor = create_patch_executor(PatchType.CachedCrossAttentionPatch) @@ -41,6 +41,12 @@ def split_patch_kwargs(patch_kwargs): else: split2dict[k] = v + # patch for weight + weight = split1dict["weight"] + if isinstance(weight, (int, float)): + weight = torch.tensor([weight]) + split1dict["weight"] = weight.to(model_management.get_torch_device()) + return split1dict, split2dict new_patch_kwargs, patch_kwargs = split_patch_kwargs(patch_kwargs) diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_utils.py b/onediff_comfy_nodes/modules/oneflow/hijack_utils.py new file mode 100644 index 000000000..4a4f25c5a --- /dev/null +++ b/onediff_comfy_nodes/modules/oneflow/hijack_utils.py @@ -0,0 +1,28 @@ +"""hijack ComfyUI/comfy/utils.py""" +import torch +from comfy.utils import copy_to_param +from ..sd_hijack_utils import Hijacker + + +def copy_to_param_of(org_fn, obj, attr, value): + # inplace update tensor instead of replacing it + attrs = attr.split(".") + for name in attrs[:-1]: + obj = getattr(obj, name) + prev = getattr(obj, attrs[-1]) + + if prev.data.dtype == torch.int8 and prev.data.dtype != value.dtype: + return + + prev.data.copy_(value) + + +def cond_func(orig_func, *args, **kwargs): + return True + + +comfy_utils_hijack = Hijacker() + +comfy_utils_hijack.register( + orig_func=copy_to_param, sub_func=copy_to_param_of, cond_func=cond_func +) diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py index e1c91b7ba..32b668121 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py @@ -7,8 +7,8 @@ from nodes import * # must imported before import comfy -from onediff.infer_compiler.transform import register -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.transform import register +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version from .attention import CrossAttention as CrossAttention1f from .attention import SpatialTransformer as SpatialTransformer1f diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py index 3eb09d9fb..27bf9165a 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py @@ -8,7 +8,7 @@ import oneflow as torch import oneflow.nn as nn from einops import rearrange, repeat -from onediff.infer_compiler.transform import proxy_class, transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py index 854eb9f85..e320170c0 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py @@ -4,7 +4,7 @@ import oneflow as torch import oneflow.nn as nn import oneflow.nn.functional as F -from onediff.infer_compiler.transform import proxy_class +from onediff.infer_compiler.backends.oneflow.transform import proxy_class def Normalize(in_channels, num_groups=32): diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py index cf6e54553..638b4b3cd 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py @@ -1,5 +1,5 @@ import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_comfy = transform_mgr.transform_package("comfy") proxy_ops = transformed_comfy.ops diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py index 88cc98469..b8469004b 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py @@ -4,7 +4,7 @@ import oneflow as th # 'th' is the way ComfyUI name the torch import oneflow.nn.functional as F from einops import rearrange -from onediff.infer_compiler.transform import proxy_class, transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py index 86e822739..14f1a26d8 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py @@ -1,7 +1,7 @@ # ComfyUI/comfy/ldm/modules/diffusionmodules/model.py import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py index d05e8acb5..48f0de2a6 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py @@ -1,6 +1,6 @@ import onediff_quant import oneflow as flow -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register torch2oflow_class_map = { onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule, diff --git a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py index 5b8143605..80a242f2d 100644 --- a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py @@ -1,6 +1,6 @@ from register_comfy.CrossAttentionPatch import is_crossAttention_patch -from onediff.infer_compiler.utils import online_quantization_utils +from onediff.infer_compiler.backends.oneflow import online_quantization_utils from .patch_executor import PatchExecutorBase diff --git a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py index ee42cc171..a70246405 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py @@ -5,8 +5,8 @@ from comfy.model_base import BaseModel, SVD_img2vid from comfy.model_patcher import ModelPatcher -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule -from onediff.infer_compiler.utils import set_boolean_env_var +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule +from onediff.utils import set_boolean_env_var from ..patch_management import PatchType, create_patch_executor diff --git a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py index 96844fb2e..34acfe3b0 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py @@ -4,8 +4,8 @@ from comfy import model_management from folder_paths import get_input_directory # onediff -from onediff.infer_compiler import CompileOptions, oneflow_compile -from onediff.infer_compiler.transform import torch2oflow +from onediff.infer_compiler import OneflowCompileOptions, oneflow_compile +from onediff.infer_compiler.backends.oneflow.transform import torch2oflow from onediff.optimization.quant_optimizer import quantize_model # onediff_comfy_nodes @@ -18,9 +18,9 @@ def compoile_unet(diffusion_model, graph_file): print(f" OneDiffCheckpointLoaderSimple load_checkpoint file_path {graph_file}") - compile_options = CompileOptions() - compile_options.oneflow.graph_file = graph_file - compile_options.oneflow.graph_file_device = load_device + compile_options = OneflowCompileOptions() + compile_options.graph_file = graph_file + compile_options.graph_file_device = load_device diffusion_model = oneflow_compile(diffusion_model, options=compile_options) return diffusion_model diff --git a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py index be22c7e64..6441673d6 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py @@ -33,7 +33,7 @@ def __init__( graph_device=None, ): from onediff.infer_compiler import ( - CompileOptions, + OneflowCompileOptions, oneflow_compile, DeployableModule, ) @@ -49,10 +49,10 @@ def __init__( "diffusion_model" ] = self.model.diffusion_model else: - options = CompileOptions() - options.oneflow.use_graph = use_graph - options.oneflow.graph_file = graph_path - options.oneflow.graph_file_device = graph_device + options = OneflowCompileOptions() + options.use_graph = use_graph + options.graph_file = graph_path + options.graph_file_device = graph_device self.model.__dict__["_modules"]["diffusion_model"] = oneflow_compile( self.model.diffusion_model, options=options ) @@ -506,7 +506,7 @@ def __init__( gen_compile_options=None, ): from onediff.infer_compiler import ( - CompileOptions, + OneflowCompileOptions, oneflow_compile, DeployableModule, ) @@ -525,14 +525,14 @@ def __init__( self.model.diffusion_model, cache_layer_id, cache_block_id ) if use_graph: - gen_compile_options = gen_compile_options or (lambda x: CompileOptions()) + gen_compile_options = gen_compile_options or (lambda x: OneflowCompileOptions()) compile_options = gen_compile_options(self.deep_cache_unet) - compile_options.oneflow.use_graph = use_graph + compile_options.use_graph = use_graph self.deep_cache_unet = oneflow_compile( self.deep_cache_unet, options=compile_options, ) compile_options = gen_compile_options(self.fast_deep_cache_unet) - compile_options.oneflow.use_graph = use_graph + compile_options.use_graph = use_graph self.fast_deep_cache_unet = oneflow_compile( self.fast_deep_cache_unet, options=compile_options, ) diff --git a/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py index c56ef3244..2c702e383 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py @@ -3,7 +3,7 @@ import folder_paths import torch from comfy import model_management -from onediff.infer_compiler import CompileOptions, oneflow_compile +from onediff.infer_compiler import OneflowCompileOptions, oneflow_compile from ..config import _USE_UNET_INT8, ONEDIFF_QUANTIZED_OPTIMIZED_MODELS from .graph_path import generate_graph_path @@ -49,11 +49,11 @@ def onediff_load_quant_checkpoint_advanced( ) if vae_speedup == "enable": - compile_options = CompileOptions() - compile_options.oneflow.graph_file = generate_graph_path( + compile_options = OneflowCompileOptions() + compile_options.graph_file = generate_graph_path( ckpt_name, vae.first_stage_model ) - compile_options.oneflow.graph_file_device = model_management.get_torch_device() + compile_options.graph_file_device = model_management.get_torch_device() vae.first_stage_model = oneflow_compile( vae.first_stage_model, options=compile_options ) diff --git a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py index 048a0312d..a14b15603 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py @@ -10,8 +10,7 @@ from nodes import KSampler, VAEDecode from onediff.infer_compiler import oneflow_compile # onediff -from onediff.infer_compiler.utils.module_operations import (get_sub_module, - modify_sub_module) +from onediff.torch_utils.module_operations import (get_sub_module, modify_sub_module) from onediff_quant import Quantizer # onediff_quant from onediff_quant.utils import (find_quantizable_modules, get_quantize_module, diff --git a/onediff_diffusers_extensions/README.md b/onediff_diffusers_extensions/README.md index 7ef564f6c..1704aeb63 100644 --- a/onediff_diffusers_extensions/README.md +++ b/onediff_diffusers_extensions/README.md @@ -208,7 +208,7 @@ pipe = StableVideoDiffusionPipeline.from_pretrained( ) pipe.to("cuda") -compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = 0 +compile_options.attention_allow_half_precision_score_accumulation_max_m = 0 pipe = compile_pipe(pipe, options=compile_options) input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true") diff --git a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py index cbd7dc81f..ebdc5de2f 100644 --- a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py +++ b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py @@ -18,7 +18,7 @@ from diffusers import EulerDiscreteScheduler from diffusers import utils -from onediff.infer_compiler.utils.cost_util import cost_cnt +from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt _MODEL_ID = "stabilityai/stable-diffusion-2" diff --git a/onediff_diffusers_extensions/examples/pixart_alpha/README.md b/onediff_diffusers_extensions/examples/pixart_alpha/README.md new file mode 100644 index 000000000..a63891526 --- /dev/null +++ b/onediff_diffusers_extensions/examples/pixart_alpha/README.md @@ -0,0 +1,53 @@ +# Run PixArt alpha (with nexfort backend) +## Environment setup +### Set up onediff +https://github.com/siliconflow/onediff?tab=readme-ov-file#installation + +### Set up nexfort backend +https://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort + +### Set up PixArt alpha +HF model: https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS + +HF pipeline: https://huggingface.co/docs/diffusers/main/en/api/pipelines/pixart + +## Run +model_id_or_path_to_PixArt-XL-2-1024-MS is the model id or model path of pixart alpha, such as `/data/hf_models/PixArt-XL-2-1024-MS/` + +### Go to the onediff folder +``` +cd onediff +``` + +### Run 1024*1024 without compile(the original pytorch HF diffusers pipeline) +``` +python3 ./benchmarks/text_to_image.py --model /data/hf_models/PixArt-XL-2-1024-MS/ --scheduler none --steps 20 --compiler none --output-image ./pixart_alpha.png +``` + +### Run 1024*1024 with compile +``` +python3 ./benchmarks/text_to_image.py --model /data/hf_models/PixArt-XL-2-1024-MS/ --scheduler none --steps 20 --compiler nexfort --output-image ./pixart_alpha.png +``` + +## Performance comparation +### nexfort compile config +- compiler-config default is `{"mode": "max-optimize:max-autotune:freezing:benchmark:cudagraphs", "memory_format": "channels_last"}` in `/benchmarks/text_to_image.py` + - setting `--compiler-config '{"mode": "max-autotune", "memory_format": "channels_last"}'` will reduce compilation time to 57.863s and just slightly reduce the performance +- fuse_qkv_projections: True + +### Metric +| Metric | NVIDIA A100-PCIE-40GB (1024 * 1024) | +| ------------------------------------------------ | ----------------------------------- | +| Data update date(yyyy-mm-dd) | 2024-05-23 | +| PyTorch iteration speed | 8.623it/s | +| OneDiff iteration speed | 10.743it/s(+24.58%) | +| PyTorch E2E time | 2.568s | +| OneDiff E2E time | 1.992s(-22.4%) | +| PyTorch Max Mem Used | 14.445GiB | +| OneDiff Max Mem Used | 13.855GiB | +| PyTorch Warmup with Run time | 4.100s | +| OneDiff Warmup with Compilation time1 | 115.309s | +| OneDiff Warmup with Cache time | TODO | + + 1 OneDiff Warmup with Compilation time is tested on Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz. Note this is just for reference, and it varies a lot on different CPU. + diff --git a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py index a6f323998..2081fbaa6 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py +++ b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py @@ -6,7 +6,7 @@ import torch.nn as nn # oneflow_compile should be imported before importing any diffusers -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions def parse_args(): @@ -110,7 +110,8 @@ def parse_args(): pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits, ) -compile_options.oneflow.use_graph = args.graph +compile_options = OneflowCompileOptions() +compile_options.use_graph = args.graph if args.compile_text_encoder: if pipe.text_encoder is not None: diff --git a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py index e5b150052..e42b47071 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py @@ -2,7 +2,7 @@ import time import argparse -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions import torch import torch.nn as nn @@ -92,7 +92,8 @@ def parse_args(): pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits, ) -compile_options.oneflow.use_graph = args.graph +compile_options = OneflowCompileOptions() +compile_options.use_graph = args.graph if args.compile_text_encoder: if pipe.text_encoder is not None: diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py index 859b2e491..5a164d239 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py @@ -6,7 +6,7 @@ import torch.nn as nn # oneflow_compile should be imported before importing any diffusers -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions def parse_args(): @@ -90,7 +90,8 @@ def parse_args(): pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits, ) -compile_options.oneflow.use_graph = args.graph +compile_options = OneflowCompileOptions() +compile_options.use_graph = args.graph if args.compile_text_encoder: if pipe.text_encoder is not None: diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py index aa5d86058..06d16c81f 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py @@ -2,7 +2,7 @@ from pathlib import Path from diffusers import DiffusionPipeline from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign try: from onediffx.lora import load_and_fuse_lora, unfuse_lora, update_graph_with_constant_folding_info diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py index ae9488221..0da27858f 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py @@ -7,7 +7,7 @@ import torch import oneflow as flow -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions from diffusers import DiffusionPipeline parser = argparse.ArgumentParser() @@ -47,7 +47,8 @@ # Compile unet and vae print("unet and vae is compiled to oneflow.") -compile_options.oneflow.max_cached_graph_size = cmd_args.num_dynamic_input_size +compile_options = OneflowCompileOptions() +compile_options.max_cached_graph_size = cmd_args.num_dynamic_input_size base.unet = oneflow_compile(base.unet, options=compile_options) base.vae.decoder = oneflow_compile(base.vae.decoder, options=compile_options) diff --git a/onediff_diffusers_extensions/onediffx/__init__.py b/onediff_diffusers_extensions/onediffx/__init__.py index 2da48e8f8..532dad12c 100644 --- a/onediff_diffusers_extensions/onediffx/__init__.py +++ b/onediff_diffusers_extensions/onediffx/__init__.py @@ -1,5 +1,5 @@ __version__ = "1.1.0.dev1" -from onediff.infer_compiler import compile_options from .compilers.diffusion_pipeline_compiler import compile_pipe, save_pipe, load_pipe +from onediff.infer_compiler import OneflowCompileOptions -__all__ = ["compile_pipe", "compile_options", "save_pipe", "load_pipe"] +__all__ = ["compile_pipe", "save_pipe", "load_pipe", "OneflowCompileOptions"] diff --git a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py index 3307991e3..dbd784367 100644 --- a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py +++ b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py @@ -1,7 +1,7 @@ import os import torch from onediff.infer_compiler import compile, DeployableModule -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger def _recursive_getattr(obj, attr, default=None): @@ -29,11 +29,11 @@ def _recursive_setattr(obj, attr, value): "fast_unet", # for deepcache "prior", # for StableCascadePriorPipeline "decoder", # for StableCascadeDecoderPipeline + "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline "vqgan.down_blocks", # for StableCascadeDecoderPipeline "vqgan.up_blocks", # for StableCascadeDecoderPipeline "vae.decoder", "vae.encoder", - "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline ] @@ -52,8 +52,20 @@ def _filter_parts(ignores=()): def compile_pipe( - pipe, *, backend="oneflow", options=None, ignores=(), + pipe, *, backend="oneflow", options=None, ignores=(), fuse_qkv_projections=False, ): + if fuse_qkv_projections: + pipe = fuse_qkv_projections_in_pipe(pipe) + + if backend == "nexfort" and isinstance(options, str): + import json + options = json.loads(options) + + if backend == "nexfort" and options is not None and "memory_format" in options: + memory_format = getattr(torch, options["memory_format"]) + pipe = convert_pipe_to_memory_format(pipe, ignores=ignores, memory_format=memory_format) + del options["memory_format"] + # To fix the bug of graph load of vae. Please refer to: https://github.com/siliconflow/onediff/issues/452 if ( hasattr(pipe, "upcast_vae") @@ -82,6 +94,33 @@ def compile_pipe( return pipe +def fuse_qkv_projections_in_pipe(pipe): + if hasattr(pipe, "fuse_qkv_projections"): + pipe.fuse_qkv_projections() + return pipe + + +def convert_pipe_to_memory_format(pipe, *, ignores=(), memory_format=torch.preserve_format): + from nexfort.utils.attributes import multi_recursive_apply + from nexfort.utils.memory_format import apply_memory_format + import functools + if memory_format == torch.preserve_format: + return pipe + + parts = [ + "unet", + "controlnet", + "fast_unet", # for deepcache + "prior", # for StableCascadePriorPipeline + "decoder", # for StableCascadeDecoderPipeline + "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline + "vqgan", # for StableCascadeDecoderPipeline + "vae", + ] + multi_recursive_apply( + pipe, parts, functools.partial(apply_memory_format, memory_format=memory_format), ignores=ignores, verbose=True + ) + return pipe def save_pipe(pipe, dir="cached_pipe", *, ignores=(), overwrite=True): if not os.path.exists(dir): diff --git a/onediff_diffusers_extensions/onediffx/lora/__init__.py b/onediff_diffusers_extensions/onediffx/lora/__init__.py index 24b78f93d..5d99001bc 100644 --- a/onediff_diffusers_extensions/onediffx/lora/__init__.py +++ b/onediff_diffusers_extensions/onediffx/lora/__init__.py @@ -6,4 +6,4 @@ get_active_adapters, ) -from onediff.infer_compiler.utils.param_utils import update_graph_with_constant_folding_info +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_with_constant_folding_info diff --git a/onediff_diffusers_extensions/onediffx/lora/lora.py b/onediff_diffusers_extensions/onediffx/lora/lora.py index f5bb290b4..8e7896094 100644 --- a/onediff_diffusers_extensions/onediffx/lora/lora.py +++ b/onediff_diffusers_extensions/onediffx/lora/lora.py @@ -5,7 +5,7 @@ import torch -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger import diffusers from diffusers.loaders import LoraLoaderMixin diff --git a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py index a0bdf76d0..df8f17ebe 100644 --- a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py +++ b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py @@ -19,7 +19,7 @@ from diffusers.utils import is_accelerate_available from diffusers.models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from .utils import fuse_lora, get_adapter_names diff --git a/onediff_diffusers_extensions/onediffx/lora/unet.py b/onediff_diffusers_extensions/onediffx/lora/unet.py index cca033aa1..98834eeaa 100644 --- a/onediff_diffusers_extensions/onediffx/lora/unet.py +++ b/onediff_diffusers_extensions/onediffx/lora/unet.py @@ -4,7 +4,7 @@ import torch from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from diffusers.models.lora import ( LoRACompatibleConv, LoRACompatibleLinear, diff --git a/onediff_diffusers_extensions/onediffx/lora/utils.py b/onediff_diffusers_extensions/onediffx/lora/utils.py index 49fe2aca2..89b029d45 100644 --- a/onediff_diffusers_extensions/onediffx/lora/utils.py +++ b/onediff_diffusers_extensions/onediffx/lora/utils.py @@ -14,13 +14,13 @@ else: is_peft_available = lambda: False -from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_related_tensor if version.parse(diffusers.__version__) <= version.parse("0.20.0"): from diffusers.loaders import PatchedLoraProjection else: from diffusers.models.lora import PatchedLoraProjection -from onediff.infer_compiler.oneflow.dual_module import DualModule +from onediff.infer_compiler.backends.oneflow.dual_module import DualModule if version.parse(diffusers.__version__) <= version.parse("0.20.0"): from diffusers.loaders import PatchedLoraProjection diff --git a/onediff_diffusers_extensions/tests/profile_lora.py b/onediff_diffusers_extensions/tests/profile_lora.py index 1bf310aee..1ecdc3535 100644 --- a/onediff_diffusers_extensions/tests/profile_lora.py +++ b/onediff_diffusers_extensions/tests/profile_lora.py @@ -7,7 +7,7 @@ from diffusers import DiffusionPipeline from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign from onediffx.lora import load_and_fuse_lora, unfuse_lora _time = None diff --git a/onediff_diffusers_extensions/tests/profile_multi_lora.py b/onediff_diffusers_extensions/tests/profile_multi_lora.py index 88b3d7cde..e50b6a750 100644 --- a/onediff_diffusers_extensions/tests/profile_multi_lora.py +++ b/onediff_diffusers_extensions/tests/profile_multi_lora.py @@ -8,7 +8,7 @@ from diffusers.utils.constants import USE_PEFT_BACKEND from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign from onediffx.lora import load_and_fuse_lora, unfuse_lora, set_and_fuse_adapters if not USE_PEFT_BACKEND: diff --git a/onediff_sd_webui_extensions/api_examples/img2img.py b/onediff_sd_webui_extensions/api_examples/img2img.py index 4b5e6ee79..1512ba8b8 100644 --- a/onediff_sd_webui_extensions/api_examples/img2img.py +++ b/onediff_sd_webui_extensions/api_examples/img2img.py @@ -6,13 +6,13 @@ # And if you are using OneDiff Enterprise, add another # `"script_args" : [{"0": True}]` field to enable quantization -from datetime import datetime -from pathlib import Path -import urllib.request import base64 import json -import time import os +import time +import urllib.request +from datetime import datetime +from pathlib import Path webui_server_url = "http://127.0.0.1:7860" diff --git a/onediff_sd_webui_extensions/api_examples/txt2img.py b/onediff_sd_webui_extensions/api_examples/txt2img.py index 2ba72960e..9cb4b2be5 100644 --- a/onediff_sd_webui_extensions/api_examples/txt2img.py +++ b/onediff_sd_webui_extensions/api_examples/txt2img.py @@ -6,12 +6,12 @@ # And if you are using OneDiff Enterprise, add another # `"script_args" : [{"0": True}]` field to enable quantization -from datetime import datetime -import urllib.request import base64 import json -import time import os +import time +import urllib.request +from datetime import datetime webui_server_url = "http://127.0.0.1:7860" @@ -57,7 +57,6 @@ def call_txt2img_api(**payload): "width": 1024, "height": 1024, "cfg_scale": 7, - "sampler_name": "DPM++ 2M Karras", "n_iter": 1, "batch_size": 1, # Enable OneDiff speed up diff --git a/onediff_sd_webui_extensions/compile_ldm.py b/onediff_sd_webui_extensions/compile_ldm.py index e6a3aec06..e87f7f696 100644 --- a/onediff_sd_webui_extensions/compile_ldm.py +++ b/onediff_sd_webui_extensions/compile_ldm.py @@ -1,8 +1,6 @@ import os -import oneflow as flow -from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +import oneflow as flow from ldm.modules.attention import ( BasicTransformerBlock, CrossAttention, @@ -17,6 +15,9 @@ timestep_embedding, ) +from onediff.infer_compiler import oneflow_compile +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register + __all__ = ["compile_ldm_unet"] diff --git a/onediff_sd_webui_extensions/compile_sgm.py b/onediff_sd_webui_extensions/compile_sgm.py index 12398a737..154b3dc5c 100644 --- a/onediff_sd_webui_extensions/compile_sgm.py +++ b/onediff_sd_webui_extensions/compile_sgm.py @@ -1,16 +1,20 @@ import oneflow as flow -from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register from sd_webui_onediff_utils import ( CrossAttentionOflow, GroupNorm32Oflow, timestep_embedding, ) -from sgm.modules.attention import CrossAttention, SpatialTransformer -from sgm.modules.diffusionmodules.openaimodel import UNetModel, ResBlock -from sgm.modules.attention import BasicTransformerBlock +from sgm.modules.attention import ( + BasicTransformerBlock, + CrossAttention, + SpatialTransformer, +) +from sgm.modules.diffusionmodules.openaimodel import ResBlock, UNetModel from sgm.modules.diffusionmodules.util import GroupNorm32 +from onediff.infer_compiler import oneflow_compile +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register + __all__ = ["compile_sgm_unet"] diff --git a/onediff_sd_webui_extensions/compile_vae.py b/onediff_sd_webui_extensions/compile_vae.py index d5c9c7f26..f3dd03204 100644 --- a/onediff_sd_webui_extensions/compile_vae.py +++ b/onediff_sd_webui_extensions/compile_vae.py @@ -1,22 +1,27 @@ from modules import shared -from modules.sd_vae_approx import model as get_vae_model, sd_vae_approx_models from modules.sd_vae_approx import VAEApprox +from modules.sd_vae_approx import model as get_vae_model +from modules.sd_vae_approx import sd_vae_approx_models + from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register __all__ = ["VaeCompileCtx"] compiled_models = {} + class VAEApproxOflow(proxy_class(VAEApprox)): pass + torch2oflow_class_map = { VAEApprox: VAEApproxOflow, } register(package_names=["modules"], torch2oflow_class_map=torch2oflow_class_map) + class VaeCompileCtx(object): def __init__(self, options=None): self._options = options diff --git a/onediff_sd_webui_extensions/onediff_hijack.py b/onediff_sd_webui_extensions/onediff_hijack.py index f2683ac42..c8da677c6 100644 --- a/onediff_sd_webui_extensions/onediff_hijack.py +++ b/onediff_sd_webui_extensions/onediff_hijack.py @@ -1,6 +1,7 @@ -import oneflow import compile_ldm import compile_sgm +import oneflow + # https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/1c0a0c4c26f78c32095ebc7f8af82f5c04fca8c0/modules/sd_hijack_unet.py#L8 class OneFlowHijackForUnet: @@ -8,12 +9,15 @@ class OneFlowHijackForUnet: This is oneflow, but with cat that resizes tensors to appropriate dimensions if they do not match; this makes it possible to create pictures with dimensions that are multiples of 8 rather than 64 """ + def __getattr__(self, item): - if item == 'cat': + if item == "cat": return self.cat if hasattr(oneflow, item): return getattr(oneflow, item) - raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'") + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{item}'" + ) def cat(self, tensors, *args, **kwargs): if len(tensors) == 2: @@ -22,11 +26,13 @@ def cat(self, tensors, *args, **kwargs): tensors = (a, b) return oneflow.cat(tensors, *args, **kwargs) + hijack_flow = OneFlowHijackForUnet() + def unload_model_weights(sd_model=None, info=None): - from modules import lowvram, devices - from modules import shared + from modules import devices, lowvram, shared + m = sd_model or shared.sd_model if m.lowvram: lowvram.send_everything_to_cpu() @@ -35,10 +41,12 @@ def unload_model_weights(sd_model=None, info=None): devices.torch_gc() return sd_model + def send_model_to_cpu(m): # do nothing pass + def hijack_function(module, name, new_name, new_value): # restore original function in case of reload unhijack_function(module=module, name=name, new_name=new_name) @@ -51,35 +59,39 @@ def unhijack_function(module, name, new_name): setattr(module, name, getattr(module, new_name)) delattr(module, new_name) + def do_hijack(): compile_ldm.flow = hijack_flow compile_sgm.flow = hijack_flow - from modules import sd_models, script_callbacks + from modules import script_callbacks, sd_models + script_callbacks.on_script_unloaded(undo_hijack) hijack_function( module=sd_models, - name='unload_model_weights', - new_name='__onediff_original_unload_model_weights', + name="unload_model_weights", + new_name="__onediff_original_unload_model_weights", new_value=unload_model_weights, ) hijack_function( module=sd_models, - name='send_model_to_cpu', - new_name='__onediff_original_send_model_to_cpu', + name="send_model_to_cpu", + new_name="__onediff_original_send_model_to_cpu", new_value=send_model_to_cpu, ) + def undo_hijack(): compile_ldm.flow = oneflow compile_sgm.flow = oneflow from modules import sd_models + unhijack_function( module=sd_models, - name='unload_model_weights', - new_name='__onediff_original_unload_model_weights', + name="unload_model_weights", + new_name="__onediff_original_unload_model_weights", ) unhijack_function( module=sd_models, - name='send_model_to_cpu', - new_name='__onediff_original_send_model_to_cpu', + name="send_model_to_cpu", + new_name="__onediff_original_send_model_to_cpu", ) diff --git a/onediff_sd_webui_extensions/onediff_lora.py b/onediff_sd_webui_extensions/onediff_lora.py index 77066873f..0bee88e9d 100644 --- a/onediff_sd_webui_extensions/onediff_lora.py +++ b/onediff_sd_webui_extensions/onediff_lora.py @@ -1,6 +1,9 @@ import torch + from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor +from onediff.infer_compiler.backends.oneflow.param_utils import ( + update_graph_related_tensor, +) class HijackLoraActivate: diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py index 3c7e887cd..5e5766c04 100644 --- a/onediff_sd_webui_extensions/scripts/onediff.py +++ b/onediff_sd_webui_extensions/scripts/onediff.py @@ -1,37 +1,42 @@ import os -import zipfile import warnings -import gradio as gr +import zipfile from pathlib import Path -from typing import Union, Dict +from typing import Dict, Union + +import gradio as gr import modules.scripts as scripts import modules.shared as shared -from modules.sd_models import select_checkpoint -from modules.processing import process_images -from modules.ui_common import create_refresh_button -from modules import script_callbacks - -from ui_utils import hints_message, get_all_compiler_caches, refresh_all_compiler_caches, all_compiler_caches_path -from compile_ldm import compile_ldm_unet, SD21CompileCtx +from compile_ldm import SD21CompileCtx, compile_ldm_unet from compile_sgm import compile_sgm_unet from compile_vae import VaeCompileCtx -from onediff_lora import HijackLoraActivate +from modules import script_callbacks +from modules.processing import process_images +from modules.sd_models import select_checkpoint +from modules.ui_common import create_refresh_button from onediff_hijack import do_hijack as onediff_do_hijack +from onediff_lora import HijackLoraActivate +from oneflow import __version__ as oneflow_version +from ui_utils import ( + all_compiler_caches_path, + get_all_compiler_caches, + hints_message, + refresh_all_compiler_caches, +) -from onediff.infer_compiler.utils.log_utils import logger +from onediff import __version__ as onediff_version from onediff.optimization.quant_optimizer import ( quantize_model, varify_can_use_quantization, ) -from onediff.infer_compiler.utils.env_var import parse_boolean_from_env -from onediff import __version__ as onediff_version -from oneflow import __version__ as oneflow_version +from onediff.utils import logger, parse_boolean_from_env """oneflow_compiled UNetModel""" compiled_unet = None is_unet_quantized = False compiled_ckpt_name = None + def generate_graph_path(ckpt_name: str, model_name: str) -> str: base_output_dir = shared.opts.outdir_samples or shared.opts.outdir_txt2img_samples save_ckpt_graphs_path = os.path.join(base_output_dir, "graphs", ckpt_name) @@ -119,14 +124,29 @@ def ui(self, is_img2img): """ with gr.Row(): # TODO: set choices as Tuple[str, str] after the version of gradio specified webui upgrades - compiler_cache = gr.Dropdown(label="Compiler caches (Beta)", choices=["None"] + get_all_compiler_caches(), value="None", elem_id="onediff_compiler_cache") - refresh_button = create_refresh_button(compiler_cache, refresh_all_compiler_caches, lambda: {"choices": ["None"] + get_all_compiler_caches()}, "onediff_refresh_compiler_caches") + compiler_cache = gr.Dropdown( + label="Compiler caches (Beta)", + choices=["None"] + get_all_compiler_caches(), + value="None", + elem_id="onediff_compiler_cache", + ) + create_refresh_button( + compiler_cache, + refresh_all_compiler_caches, + lambda: {"choices": ["None"] + get_all_compiler_caches()}, + "onediff_refresh_compiler_caches", + ) save_cache_name = gr.Textbox(label="Saved cache name (Beta)") with gr.Row(): - always_recompile = gr.components.Checkbox(label="always_recompile", visible=parse_boolean_from_env("ONEDIFF_DEBUG")) - if not varify_can_use_quantization(): - gr.HTML(hints_message) - is_quantized = gr.components.Checkbox(label="Model Quantization(int8) Speed Up", visible=varify_can_use_quantization()) + always_recompile = gr.components.Checkbox( + label="always_recompile", + visible=parse_boolean_from_env("ONEDIFF_DEBUG"), + ) + gr.HTML(hints_message, elem_id="hintMessage", visible=not varify_can_use_quantization()) + is_quantized = gr.components.Checkbox( + label="Model Quantization(int8) Speed Up", + visible=varify_can_use_quantization(), + ) return [is_quantized, compiler_cache, save_cache_name, always_recompile] def show(self, is_img2img): @@ -143,7 +163,7 @@ def get_model_type(model): "is_ssd": model.is_ssd, } - if self.current_type == None: + if self.current_type is None: is_changed = True else: for key, v in self.current_type.items(): @@ -151,11 +171,18 @@ def get_model_type(model): is_changed = True break - if is_changed == True: + if is_changed is True: self.current_type = get_model_type(model) return is_changed - def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", always_recompile=False): + def run( + self, + p, + quantization=False, + compiler_cache=None, + saved_cache_name="", + always_recompile=False, + ): global compiled_unet, compiled_ckpt_name, is_unet_quantized current_checkpoint = shared.opts.sd_model_checkpoint @@ -165,9 +192,11 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a model_changed = self.check_model_change(shared.sd_model) quantization_changed = quantization != is_unet_quantized need_recompile = ( - (quantization and ckpt_changed) # always recompile when switching ckpt with 'int8 speed model' enabled - or model_changed # always recompile when switching model to another structure - or quantization_changed # always recompile when switching model from non-quantized to quantized (and vice versa) + ( + quantization and ckpt_changed + ) # always recompile when switching ckpt with 'int8 speed model' enabled + or model_changed # always recompile when switching model to another structure + or quantization_changed # always recompile when switching model from non-quantized to quantized (and vice versa) or always_recompile ) @@ -178,16 +207,23 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a original_diffusion_model, quantization=quantization ) - if compiler_cache != "None": + # Due to the version of gradio compatible with sd-webui, the CompilerCache dropdown box always returns a string + if compiler_cache not in [None, "None"]: compiler_cache_path = all_compiler_caches_path() + f"/{compiler_cache}" if not Path(compiler_cache_path).exists(): - raise FileNotFoundError(f"Cannot find cache {compiler_cache_path}, please make sure it exists") + raise FileNotFoundError( + f"Cannot find cache {compiler_cache_path}, please make sure it exists" + ) try: compiled_unet.load_graph(compiler_cache_path, run_warmup=True) - except zipfile.BadZipFile as e: - raise RuntimeError("Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui") + except zipfile.BadZipFile: + raise RuntimeError( + "Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui" + ) except Exception as e: - raise RuntimeError("Load cache failed. Please make sure cache has the same sd version (or unet architure) with current checkpoint") + raise RuntimeError( + f"Load cache failed ({e}). Please make sure cache has the same sd version (or unet architure) with current checkpoint" + ) else: logger.info( @@ -199,8 +235,10 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a if saved_cache_name != "": if not os.access(str(all_compiler_caches_path()), os.W_OK): - raise PermissionError(f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \ - Please change it in the settings to a directory with write permissions") + raise PermissionError( + f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \ + Please change it in the settings to a directory with write permissions" + ) if not Path(all_compiler_caches_path()).exists(): Path(all_compiler_caches_path()).mkdir() saved_cache_name = all_compiler_caches_path() + f"/{saved_cache_name}" @@ -209,10 +247,18 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a return proc + def on_ui_settings(): - section = ('onediff', "OneDiff") - shared.opts.add_option("onediff_compiler_caches_path", shared.OptionInfo( - str(Path(__file__).parent.parent / "compiler_caches"), "Directory for onediff compiler caches", section=section)) + section = ("onediff", "OneDiff") + shared.opts.add_option( + "onediff_compiler_caches_path", + shared.OptionInfo( + str(Path(__file__).parent.parent / "compiler_caches"), + "Directory for onediff compiler caches", + section=section, + ), + ) + script_callbacks.on_ui_settings(on_ui_settings) onediff_do_hijack() diff --git a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py index 3b95b837c..19378d59c 100644 --- a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py +++ b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py @@ -4,7 +4,10 @@ # *Only* converts the UNet, VAE, and Text Encoder. # Does not convert optimizer state or any other thing. -__all__ = ["convert_sd", "convert_unet_calibrate_info_sd"] +__all__ = [ + # "convert_sd", + "convert_unet_calibrate_info_sd", +] import argparse import os.path as osp @@ -14,7 +17,6 @@ import torch from safetensors.torch import load_file, save_file - # =================# # UNet Conversion # # =================# @@ -304,6 +306,7 @@ def convert_text_enc_state_dict_v20(text_enc_dict): def convert_text_enc_state_dict(text_enc_dict): return text_enc_dict + def convert_unet_calibrate_dict(state_dict) -> str: mapping = {k: k for k in state_dict} remove_suffix = ( @@ -345,14 +348,31 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): for name, info in dst_info.items(): f.write(f"{name} {info}\n") + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model_path", default=None, type=str, required=True, help="Path to the model to convert.") - parser.add_argument("--checkpoint_path", default=None, type=str, required=True, help="Path to the output model.") - parser.add_argument("--half", action="store_true", help="Save weights in half precision.") parser.add_argument( - "--use_safetensors", action="store_true", help="Save weights use safetensors, default is ckpt." + "--model_path", + default=None, + type=str, + required=True, + help="Path to the model to convert.", + ) + parser.add_argument( + "--checkpoint_path", + default=None, + type=str, + required=True, + help="Path to the output model.", + ) + parser.add_argument( + "--half", action="store_true", help="Save weights in half precision." + ) + parser.add_argument( + "--use_safetensors", + action="store_true", + help="Save weights use safetensors, default is ckpt.", ) args = parser.parse_args() @@ -387,7 +407,9 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): # Convert the UNet model unet_state_dict = convert_unet_state_dict(unet_state_dict) - unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()} + unet_state_dict = { + "model.diffusion_model." + k: v for k, v in unet_state_dict.items() + } # Convert the VAE model vae_state_dict = convert_vae_state_dict(vae_state_dict) @@ -400,10 +422,14 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()} text_enc_dict = convert_text_enc_state_dict_v20(text_enc_dict) - text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = { + "cond_stage_model.model." + k: v for k, v in text_enc_dict.items() + } else: text_enc_dict = convert_text_enc_state_dict(text_enc_dict) - text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = { + "cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items() + } # Put together new checkpoint state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict} @@ -416,8 +442,13 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): state_dict = {"state_dict": state_dict} torch.save(state_dict, args.checkpoint_path) - calibrate_info_save_path = Path(args.checkpoint_path).parent / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" - convert_unet_calibrate_info_sd(args.model_path + "/calibrate_info.txt", calibrate_info_save_path) + calibrate_info_save_path = ( + Path(args.checkpoint_path).parent + / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" + ) + convert_unet_calibrate_info_sd( + args.model_path + "/calibrate_info.txt", calibrate_info_save_path + ) # def get_unet_state_dict(model_path): # unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors") @@ -490,4 +521,3 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): # else: # state_dict = {"state_dict": state_dict} # torch.save(state_dict, checkpoint_path) - diff --git a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py index 7284c9293..e97c5849f 100644 --- a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py +++ b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py @@ -4,7 +4,10 @@ # *Only* converts the UNet, VAE, and Text Encoder. # Does not convert optimizer state or any other thing. -__all__ = ["convert_sdxl", "convert_unet_calibrate_info_sdxl"] +__all__ = [ + # "convert_sdxl", + "convert_unet_calibrate_info_sdxl", +] import argparse import os.path as osp @@ -14,7 +17,6 @@ import torch from safetensors.torch import load_file, save_file - # =================# # UNet Conversion # # =================# @@ -285,6 +287,7 @@ def convert_openclip_text_enc_state_dict(text_enc_dict): def convert_openai_text_enc_state_dict(text_enc_dict): return text_enc_dict + def convert_unet_calibrate_dict(state_dict) -> str: mapping = {k: k for k in state_dict} remove_suffix = ( @@ -333,11 +336,27 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model_path", default=None, type=str, required=True, help="Path to the model to convert.") - parser.add_argument("--checkpoint_path", default=None, type=str, required=True, help="Path to the output model.") - parser.add_argument("--half", action="store_true", help="Save weights in half precision.") parser.add_argument( - "--use_safetensors", action="store_true", help="Save weights use safetensors, default is ckpt." + "--model_path", + default=None, + type=str, + required=True, + help="Path to the model to convert.", + ) + parser.add_argument( + "--checkpoint_path", + default=None, + type=str, + required=True, + help="Path to the output model.", + ) + parser.add_argument( + "--half", action="store_true", help="Save weights in half precision." + ) + parser.add_argument( + "--use_safetensors", + action="store_true", + help="Save weights use safetensors, default is ckpt.", ) args = parser.parse_args() @@ -374,12 +393,16 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): if osp.exists(text_enc_2_path): text_enc_2_dict = load_file(text_enc_2_path, device="cpu") else: - text_enc_2_path = osp.join(args.model_path, "text_encoder_2", "pytorch_model.bin") + text_enc_2_path = osp.join( + args.model_path, "text_encoder_2", "pytorch_model.bin" + ) text_enc_2_dict = torch.load(text_enc_2_path, map_location="cpu") # Convert the UNet model unet_state_dict = convert_unet_state_dict(unet_state_dict) - unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()} + unet_state_dict = { + "model.diffusion_model." + k: v for k, v in unet_state_dict.items() + } # Convert the VAE model vae_state_dict = convert_vae_state_dict(vae_state_dict) @@ -387,19 +410,30 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): # Convert text encoder 1 text_enc_dict = convert_openai_text_enc_state_dict(text_enc_dict) - text_enc_dict = {"conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = { + "conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items() + } # Convert text encoder 2 text_enc_2_dict = convert_openclip_text_enc_state_dict(text_enc_2_dict) - text_enc_2_dict = {"conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items()} + text_enc_2_dict = { + "conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items() + } # We call the `.T.contiguous()` to match what's done in # https://github.com/huggingface/diffusers/blob/84905ca7287876b925b6bf8e9bb92fec21c78764/src/diffusers/loaders/single_file_utils.py#L1085 - text_enc_2_dict["conditioner.embedders.1.model.text_projection"] = text_enc_2_dict.pop( + text_enc_2_dict[ + "conditioner.embedders.1.model.text_projection" + ] = text_enc_2_dict.pop( "conditioner.embedders.1.model.text_projection.weight" ).T.contiguous() # Put together new checkpoint - state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict, **text_enc_2_dict} + state_dict = { + **unet_state_dict, + **vae_state_dict, + **text_enc_dict, + **text_enc_2_dict, + } if args.half: state_dict = {k: v.half() for k, v in state_dict.items()} @@ -410,8 +444,13 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): state_dict = {"state_dict": state_dict} torch.save(state_dict, args.checkpoint_path) - calibrate_info_save_path = Path(args.checkpoint_path).parent / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" - convert_unet_calibrate_info_sdxl(args.model_path + "/calibrate_info.txt", calibrate_info_save_path) + calibrate_info_save_path = ( + Path(args.checkpoint_path).parent + / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" + ) + convert_unet_calibrate_info_sdxl( + args.model_path + "/calibrate_info.txt", calibrate_info_save_path + ) # def get_unet_state_dict(model_path): @@ -497,4 +536,3 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): # else: # state_dict = {"state_dict": state_dict} # torch.save(state_dict, checkpoint_path) - diff --git a/onediff_sd_webui_extensions/ui_utils.py b/onediff_sd_webui_extensions/ui_utils.py index 7feea4eaa..7e442be4a 100644 --- a/onediff_sd_webui_extensions/ui_utils.py +++ b/onediff_sd_webui_extensions/ui_utils.py @@ -1,42 +1,48 @@ from pathlib import Path +from textwrap import dedent -hints_message = """ -
- If you need Enterprise Level Support for your system or business, please send an email to
- business@siliconflow.com.
-
- Tell us about your use case, deployment scale, and requirements.
-
- GitHub Issue: - https://github.com/siliconflow/onediff/issues -
-
+ If you need Enterprise Level Support for your system or business, please send an email to
+ business@siliconflow.com.
+
+ Tell us about your use case, deployment scale, and requirements.
+
+ GitHub Issue: + https://github.com/siliconflow/onediff/issues +
+