diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index d979dd8ce..3cb681e33 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -15,11 +15,22 @@ env: REGION_ID: cn-beijing ACR_ORG: registry.cn-beijing.aliyuncs.com/oneflow COMFYUI_SRC_DIR: ComfyUI + WEBUI_SRC_DIR: stable-diffusion-webui + WEBUI_DEPENDENCIES_SUBDIR: repos SDXL_BASE: /share_nfs/hf_models/sd_xl_base_1.0.safetensors UNET_INT8: /share_nfs/hf_models/unet_int8 CONTROL_LORA_OPENPOSEXL2_RANK256: /share_nfs/hf_models/controlnet/control-lora-openposeXL2-rank256.safetensors SELENIUM_CONTAINER_NAME: selenium-test SELENIUM_IMAGE: standalone-chrome:119.0-chromedriver-119.0-grid-4.15.0-20231129 + + # For git repos required by webui + ASSETS_COMMIT_HASH: 6f7db241d2f8ba7457bac5ca9753331f0c266917 + STABLE_DIFFUSION_COMMIT_HASH: cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf + STABLE_DIFFUSION_XL_COMMIT_HASH: 45c443b316737a4ab6e40413d7794a7f5657c19f + K_DIFFUSION_COMMIT_HASH: ab527a9a6d347f364e3d185ba6d714e22d80cb3c + BLIP_COMMIT_HASH: 48211a1594f1321b00f14c9f7a5b4813144b2fb9 + + concurrency: group: sd-examples-${{ github.ref }} cancel-in-progress: true @@ -31,6 +42,7 @@ jobs: outputs: onediff_src_url: ${{ steps.upload_to_oss.outputs.onediff_src_url }} comfy_src_url: ${{ steps.upload_to_oss.outputs.comfy_src_url }} + webui_src_url: ${{ steps.upload_to_oss.outputs.webui_src_url }} steps: - name: Setup ossutil run: | @@ -46,6 +58,57 @@ jobs: with: repository: comfyanonymous/ComfyUI path: ComfyUI + - name: Checkout Stable Diffusion WebUI + uses: actions/checkout@v4 + with: + repository: AUTOMATIC1111/stable-diffusion-webui + path: ${{ env.WEBUI_SRC_DIR }} + + # -------- The following are the dependencies required by webui -------- + - name: Checkout CLIP (dependency of webui) + uses: actions/checkout@v4 + with: + repository: openai/CLIP + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/CLIP + ref: d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 + - name: Checkout open clip (dependency of webui) + uses: actions/checkout@v4 + with: + repository: mlfoundations/open_clip + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/open_clip + ref: bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b + - name: Checkout ${{ env.WEBUI_SRC_DIR }}-assets (dependency of webui) + uses: actions/checkout@v4 + with: + repository: AUTOMATIC1111/stable-diffusion-webui-assets + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-webui-assets + ref: ${{ env.ASSETS_COMMIT_HASH }} + - name: Checkout stablediffusion (dependency of webui) + uses: actions/checkout@v4 + with: + repository: Stability-AI/stablediffusion + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-stability-ai + ref: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }} + - name: Checkout generative-models (dependency of webui) + uses: actions/checkout@v4 + with: + repository: Stability-AI/generative-models + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/generative-models + ref: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }} + - name: Checkout k-diffusion (dependency of webui) + uses: actions/checkout@v4 + with: + repository: crowsonkb/k-diffusion + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/k-diffusion + ref: ${{ env.K_DIFFUSION_COMMIT_HASH }} + - name: Checkout BLIP (dependency of webui) + uses: actions/checkout@v4 + with: + repository: salesforce/BLIP + path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/BLIP + ref: ${{ env.BLIP_COMMIT_HASH }} + # -------- The above are the dependencies required by webui -------- + - name: Pack src working-directory: onediff run: | @@ -58,15 +121,24 @@ jobs: git reset --hard git clean -f git archive --prefix ${{ env.COMFYUI_SRC_DIR }}/ --format zip HEAD > comfyui-src.zip + - name: Pack webui + working-directory: ${{ env.WEBUI_SRC_DIR }} + run: | + git reset --hard + git clean -f + zip -r webui-src.zip . - name: Upload src id: upload_to_oss run: | ONEDIFF_DST="oss://gh-src-cache/onediff/${{ github.sha }}/onediff-src.zip" COMFY_DST="oss://gh-src-cache/onediff/${{ github.sha }}/comfyui-src.zip" + WEBUI_DST="oss://gh-src-cache/onediff/${{ github.sha }}/webui-src.zip" ./ossutil64 cp --disable-ignore-error --update onediff/onediff-src.zip ${ONEDIFF_DST} ./ossutil64 cp --disable-ignore-error --update ComfyUI/comfyui-src.zip ${COMFY_DST} + ./ossutil64 cp --disable-ignore-error --update ${{ env.WEBUI_SRC_DIR }}/webui-src.zip ${WEBUI_DST} echo "onediff_src_url=${ONEDIFF_DST}" >> $GITHUB_OUTPUT echo "comfy_src_url=${COMFY_DST}" >> $GITHUB_OUTPUT + echo "webui_src_url=${WEBUI_DST}" >> $GITHUB_OUTPUT run-examples: name: " ${{ matrix.test-suite }} ${{ matrix.image }}" runs-on: [self-hosted, cuda] @@ -81,6 +153,7 @@ jobs: test-suite: - diffusers_examples - comfy + - webui steps: - name: Login to ACR with the AccessKey pair uses: aliyun/acr-login@v1 @@ -110,6 +183,11 @@ jobs: run: | $HOME/ossutil64 cp ${{ needs.upload_src.outputs.comfy_src_url }} . unzip -o $(basename ${{ needs.upload_src.outputs.comfy_src_url }}) + - name: Checkout WebUI vis OSS + if: matrix.test-suite == 'webui' && github.repository == 'siliconflow/onediff' + run: | + $HOME/ossutil64 cp ${{ needs.upload_src.outputs.webui_src_url }} . + unzip -o $(basename ${{ needs.upload_src.outputs.webui_src_url }}) -d ${{env.WEBUI_SRC_DIR}} - name: Checkout if: github.repository != 'siliconflow/onediff' uses: actions/checkout@v4 @@ -119,6 +197,12 @@ jobs: with: repository: comfyanonymous/ComfyUI path: ${{ env.COMFYUI_SRC_DIR }} + - name: Checkout Stable Diffusion WebUI + if: matrix.test-suite == 'webui' && github.repository != 'siliconflow/onediff' + uses: actions/checkout@v4 + with: + repository: AUTOMATIC1111/stable-diffusion-webui + path: ${{ env.WEBUI_SRC_DIR }} - name: Clean docker containers run: | docker rm -f ${{ env.CONTAINER_NAME }} || true @@ -158,6 +242,25 @@ jobs: SDXL_BASE: ${{ env.SDXL_BASE }} UNET_INT8: ${{ env.UNET_INT8 }} SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} + + - name: Setup docker for WebUI Test + if: matrix.test-suite == 'webui' + run: | + env + docker compose -f tests/webui-docker-compose.yml up -d + env: + CONTAINER_NAME: ${{ env.CONTAINER_NAME }} + MATRIX_IMAGE: ${{ matrix.image }} + WEBUI_SRC_DIR: ${{ env.WEBUI_SRC_DIR }} + WEBUI_DEPENDENCIES_SUBDIR: ${{ env.WEBUI_DEPENDENCIES_SUBDIR }} + SELENIUM_IMAGE: ${{ env.SELENIUM_IMAGE }} + SELENIUM_CONTAINER_NAME: ${{ env.SELENIUM_CONTAINER_NAME }} + SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} + ASSETS_COMMIT_HASH: ${{ env.ASSETS_COMMIT_HASH }} + STABLE_DIFFUSION_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }} + STABLE_DIFFUSION_XL_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }} + K_DIFFUSION_COMMIT_HASH: ${{ env.K_DIFFUSION_COMMIT_HASH }} + BLIP_COMMIT_HASH: ${{ env.BLIP_COMMIT_HASH }} - run: nvidia-smi - run: nvidia-smi -L @@ -197,6 +300,7 @@ jobs: run_comfy_test "workflows/sdxl-unet-speedup-graph-saver.json" 200 run_comfy_test "workflows/sdxl-control-lora-speedup.json" 200 + run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/ipadapter_advanced.json" 200 run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache.json" 600 run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache-with-lora.json" 800 # run_comfy_test "workflows/text-to-video-speedup.json" 5000 @@ -234,7 +338,9 @@ jobs: - if: matrix.test-suite == 'diffusers_examples' run: docker exec -w /src/onediff/onediff_diffusers_extensions ${{ env.CONTAINER_NAME }} python3 examples/text_to_image_sdxl_turbo.py --compile true --base /share_nfs/hf_models/sdxl-turbo - if: matrix.test-suite == 'diffusers_examples' - run: docker exec -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 -m pytest -v onediff_diffusers_extensions/tests/test_lora.py + run: | + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip install scikit-image -i https://pypi.tuna.tsinghua.edu.cn/simple + docker exec -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 -m pytest -v onediff_diffusers_extensions/tests/test_lora.py # - if: matrix.test-suite == 'diffusers_examples' # run: docker exec -w /src/onediff/onediff_diffusers_extensions -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 examples/text_to_image_sdxl_reuse_pipe.py --base /share_nfs/hf_models/stable-diffusion-xl-base-1.0 --new_base /share_nfs/hf_models/dataautogpt3-OpenDalleV1.1 - if: matrix.test-suite == 'diffusers_examples' && startsWith(matrix.image, 'onediff-pro') @@ -242,6 +348,67 @@ jobs: docker exec -w /src/onediff ${{ env.CONTAINER_NAME }} python3 onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py --model /share_nfs/hf_models/stable-diffusion-v1-5-int8 --width 512 --height 512 --saved_image /src/onediff/output_enterprise_sd.png docker exec -w /src/onediff ${{ env.CONTAINER_NAME }} python3 tests/test_quantitative_quality.py + - name: Install Requirements for WebUI + if: matrix.test-suite == 'webui' + run: | + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip config set global.extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${{ env.CONTAINER_NAME }} python3 -m pip install pytorch-lightning gradio==3.41.2 diskcache gitpython pytorch_lightning==1.9.4 scikit-image jsonmerge pillow-avif-plugin torchdiffeq torchsde clean-fid resize-right lark tomesd blendmodes facexlib opencv-python==4.8.0.74 piexif inflection ftfy regex tqdm pydantic==1.10.13 + + - name: Prepare environment for WebUI + if: matrix.test-suite == 'webui' + run: | + # hack code to print error msg for debugging + # docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} sed -i '/except RuntimeError:/c\ except RuntimeError as e:\n print(f"Error occurred while running git command: {e}")' modules/launch_utils.py + docker exec -d ${{ env.CONTAINER_NAME }} mkdir /app/${{ env.WEBUI_SRC_DIR }}/.git /app/${{ env.WEBUI_SRC_DIR }}/openai + docker exec -d ${{ env.CONTAINER_NAME }} ln -s /share_nfs/onediff_ci/sd-webui/models/clips/clip-vit-large-patch14 /app/${{ env.WEBUI_SRC_DIR }}/openai/clip-vit-large-patch14 + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} git config --global --add safe.directory /app/${{ env.WEBUI_SRC_DIR }} + for dir in $SAFE_DIRECTORIES; do + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} git config --global --add safe.directory /app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/$dir + echo /app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/$dir + done + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -e venv_dir=- ${{ env.CONTAINER_NAME }} sh -c "bash webui.sh -f --exit --api --no-download-sd-model --do-not-download-clip --disable-safe-unpickle --ckpt-dir /share_nfs/onediff_ci/sd-webui/models" + + # env: + # INDEX_URL: "https://pypi.tuna.tsinghua.edu.cn/simple" + # CLIP_PACKAGE: "git+file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/CLIP" + # OPENCLIP_PACKAGE: "git+file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/open_clip" + # ASSETS_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-webui-assets" + # STABLE_DIFFUSION_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-stability-ai" + # STABLE_DIFFUSION_XL_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/generative-models" + # K_DIFFUSION_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/k-diffusion" + # BLIP_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/BLIP" + + # ASSETS_COMMIT_HASH: ${{ env.ASSETS_COMMIT_HASH }} + # STABLE_DIFFUSION_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }} + # STABLE_DIFFUSION_XL_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }} + # K_DIFFUSION_COMMIT_HASH: ${{ env.K_DIFFUSION_COMMIT_HASH }} + # BLIP_COMMIT_HASH: ${{ env.BLIP_COMMIT_HASH }} + + # SAFE_DIRECTORIES: | + # CLIP + # open_clip + # stable-diffusion-webui-assets + # stable-diffusion-stability-ai + # generative-models + # k-diffusion + # BLIP + + - name: Start WebUI Web Service + if: matrix.test-suite == 'webui' + run: | + docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} sh -c "python3 webui.py --port 7860 --api --no-download-sd-model --do-not-download-clip --disable-safe-unpickle --ckpt-dir /share_nfs/onediff_ci/sd-webui/models --skip-version-check > /app/${{ env.WEBUI_SRC_DIR }}/onediff_webui.log 2>&1" + sleep 60 + + - run: docker exec ${{ env.CONTAINER_NAME }} ps aux + + - if: matrix.test-suite == 'webui' + run: docker exec ${{ env.CONTAINER_NAME }} python3 -m pytest -v -s tests/sd-webui/test_api.py + + - name: Show WebUI Log + if: matrix.test-suite == 'webui' + run: docker exec ${{ env.CONTAINER_NAME }} cat /app/${{ env.WEBUI_SRC_DIR }}/onediff_webui.log + - name: Shutdown docker for ComfyUI Test if: matrix.test-suite == 'comfy' run: | @@ -268,3 +435,16 @@ jobs: SDXL_BASE: ${{ env.SDXL_BASE }} UNET_INT8: ${{ env.UNET_INT8 }} SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} + + - name: Shutdown docker for WebUI Test + if: matrix.test-suite == 'webui' + run: | + docker compose -f tests/webui-docker-compose.yml down + env: + CONTAINER_NAME: ${{ env.CONTAINER_NAME }} + ACR_ORG: ${{ env.ACR_ORG }} + MATRIX_IMAGE: ${{ matrix.image }} + WEBUI_SRC_DIR: ${{ env.WEBUI_SRC_DIR }} + SELENIUM_IMAGE: ${{ env.SELENIUM_IMAGE }} + SELENIUM_CONTAINER_NAME: ${{ env.SELENIUM_CONTAINER_NAME }} + SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }} diff --git a/.gitignore b/.gitignore index e68a35b10..677317453 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,6 @@ unet_graphs # onediff_comfy_nodes *.pt *.graph + +# onediff_sd_webui_extensions +onediff_sd_webui_extensions/compiled_caches/ diff --git a/benchmarks/image_to_video.py b/benchmarks/image_to_video.py index 730ec752b..fcc9e19cd 100644 --- a/benchmarks/image_to_video.py +++ b/benchmarks/image_to_video.py @@ -41,7 +41,7 @@ import oneflow as flow import torch -from onediffx import compile_pipe, compile_options +from onediffx import compile_pipe, OneflowCompileOptions from diffusers.utils import load_image, export_to_video @@ -189,7 +189,8 @@ def main(): # especially for 40xx series cards. # So here by partially disabling the half accumulation in MHA partially, # we can get a good balance. - compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = ( + compile_options = OneflowCompileOptions() + compile_options.attention_allow_half_precision_score_accumulation_max_m = ( args.attention_fp16_score_accum_max_m ) pipe = compile_pipe(pipe, options=compile_options) diff --git a/benchmarks/patch_stable_cascade_of.py b/benchmarks/patch_stable_cascade_of.py index 8f388111a..454a17344 100644 --- a/benchmarks/patch_stable_cascade_of.py +++ b/benchmarks/patch_stable_cascade_of.py @@ -5,7 +5,7 @@ from packaging import version import importlib.metadata -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_of = transform_mgr.transform_package("diffusers") StableCascadeUnet_OF_CLS = ( @@ -120,7 +120,7 @@ def forward( ) # torch2oflow_class_map.update({StableCascadeUnet: StableCascadeUnetOflow}) -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from contextlib import contextmanager diff --git a/benchmarks/run_text_to_image_benchmark.sh b/benchmarks/run_text_to_image_benchmark.sh index 273a419b9..512aab42f 100755 --- a/benchmarks/run_text_to_image_benchmark.sh +++ b/benchmarks/run_text_to_image_benchmark.sh @@ -111,6 +111,22 @@ benchmark_sd_model sd15 ${SD15_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 benchmark_sd_model sd21 ${SD21_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 benchmark_sd_model sdxl ${SDXL_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 +benchmark_sd_model_with_throughput() { + model_path=$1 + warmups=$2 + compiler=$3 + echo "Run ${model_path} with throughput test at 1024x1024..." + script_output=$(python3 ${SCRIPT_DIR}/text_to_image.py --model ${model_path} --variant fp16 --warmups ${warmups} --compiler ${compiler} --height 1024 --width 1024 --throughput | tee /dev/tty) + + throughput=$(echo "${script_output}" | grep -oP '(?<=Throughput without base cost: )\d+\.\d+') + inference_time_eq=$(echo "${script_output}" | grep -oP 'Model: Inference Time = .+') + + BENCHMARK_RESULT_TEXT="${BENCHMARK_RESULT_TEXT}| ${model_path} | 1024x1024 | N/A | N/A | N/A | N/A | Throughput without base cost: ${throughput} | ${inference_time_eq} |\n" +} + +benchmark_sd_model_with_throughput ${SD15_MODEL_PATH} ${WARMUPS} ${COMPILER} +benchmark_sd_model_with_throughput ${SDXL_MODEL_PATH} ${WARMUPS} ${COMPILER} + if [ ${BENCHMARK_QUANT_MODEL} != 0 ] && [ x"${COMPILER}" == x"oneflow" ]; then benchmark_sd_model sdxl_quant ${SDXL_QUANT_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 fi @@ -119,4 +135,5 @@ if [ ${BENCHMARK_QUANT_MODEL} != 0 ] && [ ${BENCHMARK_DEEP_CACHE_MODEL} != 0 ] & benchmark_sd_model sdxl_deepcache_quant ${SDXL_DEEP_CACHE_QUANT_MODEL_PATH} 1024x1024,720x1280,768x768,512x512 fi +echo -e "\nBenchmark Results:" echo -e "${BENCHMARK_RESULT_TEXT}" > ${OUTPUT_FILE} diff --git a/benchmarks/text_to_image.py b/benchmarks/text_to_image.py index a8c97c510..539e73f74 100644 --- a/benchmarks/text_to_image.py +++ b/benchmarks/text_to_image.py @@ -6,9 +6,9 @@ CONTROLNET = None STEPS = 30 PROMPT = "best quality, realistic, unreal engine, 4K, a beautiful girl" -NEGATIVE_PROMPT = None -SEED = None -WARMUPS = 3 +NEGATIVE_PROMPT = "" +SEED = 333 +WARMUPS = 1 BATCH = 1 HEIGHT = None WIDTH = None @@ -19,6 +19,8 @@ CACHE_INTERVAL = 3 CACHE_LAYER_ID = 0 CACHE_BLOCK_ID = 0 +COMPILER = "oneflow" +COMPILER_CONFIG = None import os import importlib @@ -27,6 +29,8 @@ import time import json import torch +import matplotlib.pyplot as plt +import numpy as np from PIL import Image, ImageDraw from diffusers.utils import load_image @@ -56,20 +60,30 @@ def parse_args(): parser.add_argument("--input-image", type=str, default=INPUT_IMAGE) parser.add_argument("--control-image", type=str, default=CONTROL_IMAGE) parser.add_argument("--output-image", type=str, default=OUTPUT_IMAGE) + parser.add_argument("--throughput", action="store_true") parser.add_argument("--deepcache", action="store_true") parser.add_argument( "--compiler", type=str, - default="oneflow", + default=COMPILER, choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"], ) + parser.add_argument( + "--compiler-config", + type=str, + default=COMPILER_CONFIG, + ) return parser.parse_args() +args = parse_args() + def load_pipe( pipeline_cls, model_name, variant=None, + dtype=torch.float16, + device="cuda", custom_pipeline=None, scheduler=None, lora=None, @@ -80,31 +94,34 @@ def load_pipe( extra_kwargs["custom_pipeline"] = custom_pipeline if variant is not None: extra_kwargs["variant"] = variant + if dtype is not None: + extra_kwargs["torch_dtype"] = dtype if controlnet is not None: from diffusers import ControlNetModel controlnet = ControlNetModel.from_pretrained( - controlnet, torch_dtype=torch.float16, + controlnet, torch_dtype=dtype, ) extra_kwargs["controlnet"] = controlnet if os.path.exists(os.path.join(model_name, "calibrate_info.txt")): from onediff.quantization import QuantPipeline pipe = QuantPipeline.from_quantized( - pipeline_cls, model_name, torch_dtype=torch.float16, **extra_kwargs + pipeline_cls, model_name, **extra_kwargs ) else: pipe = pipeline_cls.from_pretrained( - model_name, torch_dtype=torch.float16, **extra_kwargs + model_name, **extra_kwargs ) - if scheduler is not None: + if scheduler is not None and scheduler != "none": scheduler_cls = getattr(importlib.import_module("diffusers"), scheduler) pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config) if lora is not None: pipe.load_lora_weights(lora) pipe.fuse_lora() pipe.safety_checker = None - pipe.to(torch.device("cuda")) + if device is not None: + pipe.to(torch.device(device)) return pipe @@ -134,8 +151,52 @@ def callback_on_step_end(self, pipe, i, t, callback_kwargs={}): return callback_kwargs +def calculate_inference_time_and_throughput(height, width, n_steps, model): + start_time = time.time() + model(prompt=args.prompt, height=height, width=width, num_inference_steps=n_steps) + end_time = time.time() + inference_time = end_time - start_time + # pixels_processed = height * width * n_steps + # throughput = pixels_processed / inference_time + throughput = n_steps / inference_time + return inference_time, throughput + + +def generate_data_and_fit_model(model, steps_range): + height, width = 1024, 1024 + data = {"steps": [], "inference_time": [], "throughput": []} + + for n_steps in steps_range: + inference_time, throughput = calculate_inference_time_and_throughput(height, width, n_steps, model) + data["steps"].append(n_steps) + data["inference_time"].append(inference_time) + data["throughput"].append(throughput) + print(f"Steps: {n_steps}, Inference Time: {inference_time:.2f} seconds, Throughput: {throughput:.2f} steps/s") + + average_throughput = np.mean(data["throughput"]) + print(f"Average Throughput: {average_throughput:.2f} steps/s") + + coefficients = np.polyfit(data["steps"], data["inference_time"], 1) + base_time_without_base_cost = 1 / coefficients[0] + print(f"Throughput without base cost: {base_time_without_base_cost:.2f} steps/s") + return data, coefficients + + +def plot_data_and_model(data, coefficients): + plt.figure(figsize=(10, 5)) + plt.scatter(data["steps"], data["inference_time"], color='blue') + plt.plot(data["steps"], np.polyval(coefficients, data["steps"]), color='red') + plt.title("Inference Time vs. Steps") + plt.xlabel("Steps") + plt.ylabel("Inference Time (seconds)") + plt.grid(True) + # plt.savefig("output.png") + plt.show() + + print(f"Model: Inference Time = {coefficients[0]:.2f} * Steps + {coefficients[1]:.2f}") + + def main(): - args = parse_args() if args.input_image is None: if args.deepcache: from onediffx.deep_cache import StableDiffusionXLPipeline as pipeline_cls @@ -154,18 +215,32 @@ def main(): controlnet=args.controlnet, ) - height = args.height or pipe.unet.config.sample_size * pipe.vae_scale_factor - width = args.width or pipe.unet.config.sample_size * pipe.vae_scale_factor + core_net = None + if core_net is None: + core_net = getattr(pipe, "unet", None) + if core_net is None: + core_net = getattr(pipe, "transformer", None) + height = args.height or core_net.config.sample_size * pipe.vae_scale_factor + width = args.width or core_net.config.sample_size * pipe.vae_scale_factor if args.compiler == "none": pass elif args.compiler == "oneflow": pipe = compile_pipe(pipe) elif args.compiler == "nexfort": - pipe = compile_pipe(pipe, backend="nexfort") + if args.compiler_config is not None: + # config with dict + options = json.loads(args.compiler_config) + else: + # config with string + options = '{"mode": "max-optimize:max-autotune:freezing:benchmark:cudagraphs", "memory_format": "channels_last"}' + pipe = compile_pipe(pipe, backend="nexfort", options=options, fuse_qkv_projections=True) elif args.compiler in ("compile", "compile-max-autotune"): mode = "max-autotune" if args.compiler == "compile-max-autotune" else None - pipe.unet = torch.compile(pipe.unet, mode=mode) + if hasattr(pipe, "unet"): + pipe.unet = torch.compile(pipe.unet, mode=mode) + if hasattr(pipe, "transformer"): + pipe.transformer = torch.compile(pipe.transformer, mode=mode) if hasattr(pipe, "controlnet"): pipe.controlnet = torch.compile(pipe.controlnet, mode=mode) pipe.vae = torch.compile(pipe.vae, mode=mode) @@ -199,7 +274,6 @@ def get_kwarg_inputs(): negative_prompt=args.negative_prompt, height=height, width=width, - num_inference_steps=args.steps, num_images_per_prompt=args.batch, generator=None if args.seed is None @@ -210,6 +284,8 @@ def get_kwarg_inputs(): else json.loads(args.extra_call_kwargs) ), ) + if args.steps is not None: + kwarg_inputs["num_inference_steps"] = args.steps if input_image is not None: kwarg_inputs["image"] = input_image if control_image is not None: @@ -227,10 +303,15 @@ def get_kwarg_inputs(): # The initial calls will trigger compilation and might be very slow. # After that, it should be very fast. if args.warmups > 0: + begin = time.time() + print("=======================================") print("Begin warmup") for _ in range(args.warmups): pipe(**get_kwarg_inputs()) + end = time.time() print("End warmup") + print(f"Warmup time: {end - begin:.3f}s") + print("=======================================") # Let"s see it! # Note: Progress bar might work incorrectly due to the async nature of CUDA. @@ -255,7 +336,7 @@ def get_kwarg_inputs(): cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed() / 1024 else: cuda_mem_after_used = torch.cuda.max_memory_allocated() / (1024 ** 3) - print(f"CUDA Mem after: {cuda_mem_after_used:.3f}GiB") + print(f"Max used CUDA memory : {cuda_mem_after_used:.3f}GiB") print("=======================================") if args.output_image is not None: @@ -263,6 +344,11 @@ def get_kwarg_inputs(): else: print("Please set `--output-image` to save the output image") + if args.throughput: + steps_range = range(1, 100, 1) + data, coefficients = generate_data_and_fit_model(pipe, steps_range) + plot_data_and_model(data, coefficients) + if __name__ == "__main__": main() diff --git a/onediff_comfy_nodes/extras_nodes/nodes_compare.py b/onediff_comfy_nodes/extras_nodes/nodes_compare.py index 4f4461d9b..a06fa9edb 100644 --- a/onediff_comfy_nodes/extras_nodes/nodes_compare.py +++ b/onediff_comfy_nodes/extras_nodes/nodes_compare.py @@ -5,7 +5,7 @@ import folder_paths import numpy as np import oneflow as flow -from onediff.infer_compiler.transform.builtin_transform import torch2oflow +from onediff.infer_compiler.backends.oneflow.transform.builtin_transform import torch2oflow from PIL import Image try: @@ -148,8 +148,9 @@ def save_images( ) results = list() for image1, image2 in zip(images1, images2): + # image diff - image = image1 - image2 + image = image1.cuda() - image2.cuda() i = 255.0 * image.cpu().numpy() img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) diff --git a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py index ca22873ee..9daa567e6 100644 --- a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py +++ b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py @@ -7,7 +7,7 @@ from comfy import model_management from comfy.cli_args import args -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version from ..modules.oneflow.config import ONEDIFF_QUANTIZED_OPTIMIZED_MODELS from ..modules.oneflow.hijack_animatediff import animatediff_hijacker @@ -17,6 +17,7 @@ from ..modules.oneflow.hijack_samplers import samplers_hijack from ..modules.oneflow.hijack_comfyui_instantid import comfyui_instantid_hijacker from ..modules.oneflow.hijack_model_patcher import model_patch_hijacker +from ..modules.oneflow.hijack_utils import comfy_utils_hijack from ..modules.oneflow import BasicOneFlowBoosterExecutor from ..modules.oneflow import DeepcacheBoosterExecutor from ..modules.oneflow import PatchBoosterExecutor @@ -35,6 +36,7 @@ ipadapter_plus_hijacker.hijack() comfyui_instantid_hijacker.hijack() model_patch_hijacker.hijack() +comfy_utils_hijack.hijack() import comfy_extras.nodes_video_model from nodes import CheckpointLoaderSimple diff --git a/onediff_comfy_nodes/modules/oneflow/booster_basic.py b/onediff_comfy_nodes/modules/oneflow/booster_basic.py index 608462861..f35d4f27d 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_basic.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_basic.py @@ -7,7 +7,7 @@ from comfy.model_patcher import ModelPatcher from comfy.sd import VAE from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from ..booster_interface import BoosterExecutor from .onediff_controlnet import OneDiffControlLora @@ -47,6 +47,7 @@ def _(self, model: ModelPatcher, ckpt_name: Optional[str] = None, **kwargs): ) set_compiled_options(compiled_model, graph_file) + return model @execute.register(ControlNet) diff --git a/onediff_comfy_nodes/modules/oneflow/booster_patch.py b/onediff_comfy_nodes/modules/oneflow/booster_patch.py index 6bff76ba9..b12e1a042 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_patch.py @@ -2,7 +2,7 @@ from functools import singledispatchmethod from comfy.model_patcher import ModelPatcher -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from ..booster_interface import BoosterExecutor diff --git a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py index 7254ae0b3..f4b50d6e4 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py @@ -8,7 +8,7 @@ from comfy.controlnet import ControlNet from comfy.model_patcher import ModelPatcher from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from onediff_quant.quantization import QuantizationConfig from onediff_quant.quantization.module_operations import get_sub_module from onediff_quant.quantization.quantize_calibrators import ( diff --git a/onediff_comfy_nodes/modules/oneflow/config.py b/onediff_comfy_nodes/modules/oneflow/config.py index 353c4f024..8a6494e31 100644 --- a/onediff_comfy_nodes/modules/oneflow/config.py +++ b/onediff_comfy_nodes/modules/oneflow/config.py @@ -3,7 +3,7 @@ import sys from pathlib import Path -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version # Set up paths ONEDIFF_QUANTIZED_OPTIMIZED_MODELS = "onediff_quant" diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py index 167789792..d6340640f 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py @@ -4,8 +4,8 @@ """ import os -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py index 74f8dd9c4..3bbc579dc 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py @@ -1,7 +1,7 @@ # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/motion_module_ad.py import oneflow as torch from einops import repeat -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from ._config import animatediff_of, animatediff_pt @@ -124,7 +124,7 @@ def forward( ) # import torch as torch_pt -# from onediff.infer_compiler.transform import torch2oflow +# from onediff.infer_compiler.backends.oneflow.transform import torch2oflow # @torch2oflow.register(TemporalTransformer3DModel_PT_CLS) # def _(mod, verbose=False): diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py index 720c5ab2a..ea201069b 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py @@ -2,7 +2,7 @@ import oneflow as flow from einops import rearrange from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from oneflow.nn.functional import group_norm from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py index d1b4f3885..1fafec133 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py @@ -1,6 +1,6 @@ # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/utils_motion.py import oneflow as torch -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from ._config import animatediff_of, animatediff_pt diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py index ec2a1903e..d18438434 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py @@ -2,8 +2,8 @@ import traceback COMFYUI_ROOT = os.getenv("COMFYUI_ROOT") -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py index 37d11f083..b9da6376b 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py @@ -2,8 +2,8 @@ import traceback COMFYUI_ROOT = os.getenv("COMFYUI_ROOT") -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py index 9be19105d..588fe7971 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py @@ -1,11 +1,11 @@ +import torch from register_comfy.CrossAttentionPatch import Attn2Replace, ipadapter_attention -from onediff.infer_compiler.transform import torch2oflow +from comfy import model_management +from onediff.infer_compiler.backends.oneflow.transform import torch2oflow from ..utils.booster_utils import clear_deployable_module_cache_and_unbind from ..patch_management import PatchType, create_patch_executor -# from onediff.infer_compiler.utils.cost_util import cost_time -# @cost_time(debug=True, message="set_model_patch_replace_v2") def set_model_patch_replace_v2(org_fn, model, patch_kwargs, key): diff_model = model.model.diffusion_model cache_patch_executor = create_patch_executor(PatchType.CachedCrossAttentionPatch) @@ -41,6 +41,12 @@ def split_patch_kwargs(patch_kwargs): else: split2dict[k] = v + # patch for weight + weight = split1dict["weight"] + if isinstance(weight, (int, float)): + weight = torch.tensor([weight]) + split1dict["weight"] = weight.to(model_management.get_torch_device()) + return split1dict, split2dict new_patch_kwargs, patch_kwargs = split_patch_kwargs(patch_kwargs) diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_utils.py b/onediff_comfy_nodes/modules/oneflow/hijack_utils.py new file mode 100644 index 000000000..4a4f25c5a --- /dev/null +++ b/onediff_comfy_nodes/modules/oneflow/hijack_utils.py @@ -0,0 +1,28 @@ +"""hijack ComfyUI/comfy/utils.py""" +import torch +from comfy.utils import copy_to_param +from ..sd_hijack_utils import Hijacker + + +def copy_to_param_of(org_fn, obj, attr, value): + # inplace update tensor instead of replacing it + attrs = attr.split(".") + for name in attrs[:-1]: + obj = getattr(obj, name) + prev = getattr(obj, attrs[-1]) + + if prev.data.dtype == torch.int8 and prev.data.dtype != value.dtype: + return + + prev.data.copy_(value) + + +def cond_func(orig_func, *args, **kwargs): + return True + + +comfy_utils_hijack = Hijacker() + +comfy_utils_hijack.register( + orig_func=copy_to_param, sub_func=copy_to_param_of, cond_func=cond_func +) diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py index e1c91b7ba..32b668121 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py @@ -7,8 +7,8 @@ from nodes import * # must imported before import comfy -from onediff.infer_compiler.transform import register -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.transform import register +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version from .attention import CrossAttention as CrossAttention1f from .attention import SpatialTransformer as SpatialTransformer1f diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py index 3eb09d9fb..27bf9165a 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py @@ -8,7 +8,7 @@ import oneflow as torch import oneflow.nn as nn from einops import rearrange, repeat -from onediff.infer_compiler.transform import proxy_class, transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py index 854eb9f85..e320170c0 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py @@ -4,7 +4,7 @@ import oneflow as torch import oneflow.nn as nn import oneflow.nn.functional as F -from onediff.infer_compiler.transform import proxy_class +from onediff.infer_compiler.backends.oneflow.transform import proxy_class def Normalize(in_channels, num_groups=32): diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py index cf6e54553..638b4b3cd 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py @@ -1,5 +1,5 @@ import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_comfy = transform_mgr.transform_package("comfy") proxy_ops = transformed_comfy.ops diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py index 88cc98469..b8469004b 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py @@ -4,7 +4,7 @@ import oneflow as th # 'th' is the way ComfyUI name the torch import oneflow.nn.functional as F from einops import rearrange -from onediff.infer_compiler.transform import proxy_class, transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py index 86e822739..14f1a26d8 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py @@ -1,7 +1,7 @@ # ComfyUI/comfy/ldm/modules/diffusionmodules/model.py import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py index d05e8acb5..48f0de2a6 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py @@ -1,6 +1,6 @@ import onediff_quant import oneflow as flow -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register torch2oflow_class_map = { onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule, diff --git a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py index 5b8143605..80a242f2d 100644 --- a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py @@ -1,6 +1,6 @@ from register_comfy.CrossAttentionPatch import is_crossAttention_patch -from onediff.infer_compiler.utils import online_quantization_utils +from onediff.infer_compiler.backends.oneflow import online_quantization_utils from .patch_executor import PatchExecutorBase diff --git a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py index ee42cc171..a70246405 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py @@ -5,8 +5,8 @@ from comfy.model_base import BaseModel, SVD_img2vid from comfy.model_patcher import ModelPatcher -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule -from onediff.infer_compiler.utils import set_boolean_env_var +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule +from onediff.utils import set_boolean_env_var from ..patch_management import PatchType, create_patch_executor diff --git a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py index 96844fb2e..34acfe3b0 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py @@ -4,8 +4,8 @@ from comfy import model_management from folder_paths import get_input_directory # onediff -from onediff.infer_compiler import CompileOptions, oneflow_compile -from onediff.infer_compiler.transform import torch2oflow +from onediff.infer_compiler import OneflowCompileOptions, oneflow_compile +from onediff.infer_compiler.backends.oneflow.transform import torch2oflow from onediff.optimization.quant_optimizer import quantize_model # onediff_comfy_nodes @@ -18,9 +18,9 @@ def compoile_unet(diffusion_model, graph_file): print(f" OneDiffCheckpointLoaderSimple load_checkpoint file_path {graph_file}") - compile_options = CompileOptions() - compile_options.oneflow.graph_file = graph_file - compile_options.oneflow.graph_file_device = load_device + compile_options = OneflowCompileOptions() + compile_options.graph_file = graph_file + compile_options.graph_file_device = load_device diffusion_model = oneflow_compile(diffusion_model, options=compile_options) return diffusion_model diff --git a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py index be22c7e64..6441673d6 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py @@ -33,7 +33,7 @@ def __init__( graph_device=None, ): from onediff.infer_compiler import ( - CompileOptions, + OneflowCompileOptions, oneflow_compile, DeployableModule, ) @@ -49,10 +49,10 @@ def __init__( "diffusion_model" ] = self.model.diffusion_model else: - options = CompileOptions() - options.oneflow.use_graph = use_graph - options.oneflow.graph_file = graph_path - options.oneflow.graph_file_device = graph_device + options = OneflowCompileOptions() + options.use_graph = use_graph + options.graph_file = graph_path + options.graph_file_device = graph_device self.model.__dict__["_modules"]["diffusion_model"] = oneflow_compile( self.model.diffusion_model, options=options ) @@ -506,7 +506,7 @@ def __init__( gen_compile_options=None, ): from onediff.infer_compiler import ( - CompileOptions, + OneflowCompileOptions, oneflow_compile, DeployableModule, ) @@ -525,14 +525,14 @@ def __init__( self.model.diffusion_model, cache_layer_id, cache_block_id ) if use_graph: - gen_compile_options = gen_compile_options or (lambda x: CompileOptions()) + gen_compile_options = gen_compile_options or (lambda x: OneflowCompileOptions()) compile_options = gen_compile_options(self.deep_cache_unet) - compile_options.oneflow.use_graph = use_graph + compile_options.use_graph = use_graph self.deep_cache_unet = oneflow_compile( self.deep_cache_unet, options=compile_options, ) compile_options = gen_compile_options(self.fast_deep_cache_unet) - compile_options.oneflow.use_graph = use_graph + compile_options.use_graph = use_graph self.fast_deep_cache_unet = oneflow_compile( self.fast_deep_cache_unet, options=compile_options, ) diff --git a/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py index c56ef3244..2c702e383 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py @@ -3,7 +3,7 @@ import folder_paths import torch from comfy import model_management -from onediff.infer_compiler import CompileOptions, oneflow_compile +from onediff.infer_compiler import OneflowCompileOptions, oneflow_compile from ..config import _USE_UNET_INT8, ONEDIFF_QUANTIZED_OPTIMIZED_MODELS from .graph_path import generate_graph_path @@ -49,11 +49,11 @@ def onediff_load_quant_checkpoint_advanced( ) if vae_speedup == "enable": - compile_options = CompileOptions() - compile_options.oneflow.graph_file = generate_graph_path( + compile_options = OneflowCompileOptions() + compile_options.graph_file = generate_graph_path( ckpt_name, vae.first_stage_model ) - compile_options.oneflow.graph_file_device = model_management.get_torch_device() + compile_options.graph_file_device = model_management.get_torch_device() vae.first_stage_model = oneflow_compile( vae.first_stage_model, options=compile_options ) diff --git a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py index 048a0312d..a14b15603 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py @@ -10,8 +10,7 @@ from nodes import KSampler, VAEDecode from onediff.infer_compiler import oneflow_compile # onediff -from onediff.infer_compiler.utils.module_operations import (get_sub_module, - modify_sub_module) +from onediff.torch_utils.module_operations import (get_sub_module, modify_sub_module) from onediff_quant import Quantizer # onediff_quant from onediff_quant.utils import (find_quantizable_modules, get_quantize_module, diff --git a/onediff_diffusers_extensions/README.md b/onediff_diffusers_extensions/README.md index 7ef564f6c..1704aeb63 100644 --- a/onediff_diffusers_extensions/README.md +++ b/onediff_diffusers_extensions/README.md @@ -208,7 +208,7 @@ pipe = StableVideoDiffusionPipeline.from_pretrained( ) pipe.to("cuda") -compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = 0 +compile_options.attention_allow_half_precision_score_accumulation_max_m = 0 pipe = compile_pipe(pipe, options=compile_options) input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true") diff --git a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py index cbd7dc81f..ebdc5de2f 100644 --- a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py +++ b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py @@ -18,7 +18,7 @@ from diffusers import EulerDiscreteScheduler from diffusers import utils -from onediff.infer_compiler.utils.cost_util import cost_cnt +from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt _MODEL_ID = "stabilityai/stable-diffusion-2" diff --git a/onediff_diffusers_extensions/examples/pixart_alpha/README.md b/onediff_diffusers_extensions/examples/pixart_alpha/README.md new file mode 100644 index 000000000..a63891526 --- /dev/null +++ b/onediff_diffusers_extensions/examples/pixart_alpha/README.md @@ -0,0 +1,53 @@ +# Run PixArt alpha (with nexfort backend) +## Environment setup +### Set up onediff +https://github.com/siliconflow/onediff?tab=readme-ov-file#installation + +### Set up nexfort backend +https://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort + +### Set up PixArt alpha +HF model: https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS + +HF pipeline: https://huggingface.co/docs/diffusers/main/en/api/pipelines/pixart + +## Run +model_id_or_path_to_PixArt-XL-2-1024-MS is the model id or model path of pixart alpha, such as `/data/hf_models/PixArt-XL-2-1024-MS/` + +### Go to the onediff folder +``` +cd onediff +``` + +### Run 1024*1024 without compile(the original pytorch HF diffusers pipeline) +``` +python3 ./benchmarks/text_to_image.py --model /data/hf_models/PixArt-XL-2-1024-MS/ --scheduler none --steps 20 --compiler none --output-image ./pixart_alpha.png +``` + +### Run 1024*1024 with compile +``` +python3 ./benchmarks/text_to_image.py --model /data/hf_models/PixArt-XL-2-1024-MS/ --scheduler none --steps 20 --compiler nexfort --output-image ./pixart_alpha.png +``` + +## Performance comparation +### nexfort compile config +- compiler-config default is `{"mode": "max-optimize:max-autotune:freezing:benchmark:cudagraphs", "memory_format": "channels_last"}` in `/benchmarks/text_to_image.py` + - setting `--compiler-config '{"mode": "max-autotune", "memory_format": "channels_last"}'` will reduce compilation time to 57.863s and just slightly reduce the performance +- fuse_qkv_projections: True + +### Metric +| Metric | NVIDIA A100-PCIE-40GB (1024 * 1024) | +| ------------------------------------------------ | ----------------------------------- | +| Data update date(yyyy-mm-dd) | 2024-05-23 | +| PyTorch iteration speed | 8.623it/s | +| OneDiff iteration speed | 10.743it/s(+24.58%) | +| PyTorch E2E time | 2.568s | +| OneDiff E2E time | 1.992s(-22.4%) | +| PyTorch Max Mem Used | 14.445GiB | +| OneDiff Max Mem Used | 13.855GiB | +| PyTorch Warmup with Run time | 4.100s | +| OneDiff Warmup with Compilation time1 | 115.309s | +| OneDiff Warmup with Cache time | TODO | + + 1 OneDiff Warmup with Compilation time is tested on Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz. Note this is just for reference, and it varies a lot on different CPU. + diff --git a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py index a6f323998..2081fbaa6 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py +++ b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py @@ -6,7 +6,7 @@ import torch.nn as nn # oneflow_compile should be imported before importing any diffusers -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions def parse_args(): @@ -110,7 +110,8 @@ def parse_args(): pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits, ) -compile_options.oneflow.use_graph = args.graph +compile_options = OneflowCompileOptions() +compile_options.use_graph = args.graph if args.compile_text_encoder: if pipe.text_encoder is not None: diff --git a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py index e5b150052..e42b47071 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py @@ -2,7 +2,7 @@ import time import argparse -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions import torch import torch.nn as nn @@ -92,7 +92,8 @@ def parse_args(): pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits, ) -compile_options.oneflow.use_graph = args.graph +compile_options = OneflowCompileOptions() +compile_options.use_graph = args.graph if args.compile_text_encoder: if pipe.text_encoder is not None: diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py index 859b2e491..5a164d239 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py @@ -6,7 +6,7 @@ import torch.nn as nn # oneflow_compile should be imported before importing any diffusers -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions def parse_args(): @@ -90,7 +90,8 @@ def parse_args(): pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits, ) -compile_options.oneflow.use_graph = args.graph +compile_options = OneflowCompileOptions() +compile_options.use_graph = args.graph if args.compile_text_encoder: if pipe.text_encoder is not None: diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py index aa5d86058..06d16c81f 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py @@ -2,7 +2,7 @@ from pathlib import Path from diffusers import DiffusionPipeline from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign try: from onediffx.lora import load_and_fuse_lora, unfuse_lora, update_graph_with_constant_folding_info diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py index ae9488221..0da27858f 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py @@ -7,7 +7,7 @@ import torch import oneflow as flow -from onediff.infer_compiler import oneflow_compile, compile_options +from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions from diffusers import DiffusionPipeline parser = argparse.ArgumentParser() @@ -47,7 +47,8 @@ # Compile unet and vae print("unet and vae is compiled to oneflow.") -compile_options.oneflow.max_cached_graph_size = cmd_args.num_dynamic_input_size +compile_options = OneflowCompileOptions() +compile_options.max_cached_graph_size = cmd_args.num_dynamic_input_size base.unet = oneflow_compile(base.unet, options=compile_options) base.vae.decoder = oneflow_compile(base.vae.decoder, options=compile_options) diff --git a/onediff_diffusers_extensions/onediffx/__init__.py b/onediff_diffusers_extensions/onediffx/__init__.py index 2da48e8f8..532dad12c 100644 --- a/onediff_diffusers_extensions/onediffx/__init__.py +++ b/onediff_diffusers_extensions/onediffx/__init__.py @@ -1,5 +1,5 @@ __version__ = "1.1.0.dev1" -from onediff.infer_compiler import compile_options from .compilers.diffusion_pipeline_compiler import compile_pipe, save_pipe, load_pipe +from onediff.infer_compiler import OneflowCompileOptions -__all__ = ["compile_pipe", "compile_options", "save_pipe", "load_pipe"] +__all__ = ["compile_pipe", "save_pipe", "load_pipe", "OneflowCompileOptions"] diff --git a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py index 3307991e3..dbd784367 100644 --- a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py +++ b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py @@ -1,7 +1,7 @@ import os import torch from onediff.infer_compiler import compile, DeployableModule -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger def _recursive_getattr(obj, attr, default=None): @@ -29,11 +29,11 @@ def _recursive_setattr(obj, attr, value): "fast_unet", # for deepcache "prior", # for StableCascadePriorPipeline "decoder", # for StableCascadeDecoderPipeline + "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline "vqgan.down_blocks", # for StableCascadeDecoderPipeline "vqgan.up_blocks", # for StableCascadeDecoderPipeline "vae.decoder", "vae.encoder", - "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline ] @@ -52,8 +52,20 @@ def _filter_parts(ignores=()): def compile_pipe( - pipe, *, backend="oneflow", options=None, ignores=(), + pipe, *, backend="oneflow", options=None, ignores=(), fuse_qkv_projections=False, ): + if fuse_qkv_projections: + pipe = fuse_qkv_projections_in_pipe(pipe) + + if backend == "nexfort" and isinstance(options, str): + import json + options = json.loads(options) + + if backend == "nexfort" and options is not None and "memory_format" in options: + memory_format = getattr(torch, options["memory_format"]) + pipe = convert_pipe_to_memory_format(pipe, ignores=ignores, memory_format=memory_format) + del options["memory_format"] + # To fix the bug of graph load of vae. Please refer to: https://github.com/siliconflow/onediff/issues/452 if ( hasattr(pipe, "upcast_vae") @@ -82,6 +94,33 @@ def compile_pipe( return pipe +def fuse_qkv_projections_in_pipe(pipe): + if hasattr(pipe, "fuse_qkv_projections"): + pipe.fuse_qkv_projections() + return pipe + + +def convert_pipe_to_memory_format(pipe, *, ignores=(), memory_format=torch.preserve_format): + from nexfort.utils.attributes import multi_recursive_apply + from nexfort.utils.memory_format import apply_memory_format + import functools + if memory_format == torch.preserve_format: + return pipe + + parts = [ + "unet", + "controlnet", + "fast_unet", # for deepcache + "prior", # for StableCascadePriorPipeline + "decoder", # for StableCascadeDecoderPipeline + "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline + "vqgan", # for StableCascadeDecoderPipeline + "vae", + ] + multi_recursive_apply( + pipe, parts, functools.partial(apply_memory_format, memory_format=memory_format), ignores=ignores, verbose=True + ) + return pipe def save_pipe(pipe, dir="cached_pipe", *, ignores=(), overwrite=True): if not os.path.exists(dir): diff --git a/onediff_diffusers_extensions/onediffx/lora/__init__.py b/onediff_diffusers_extensions/onediffx/lora/__init__.py index 24b78f93d..5d99001bc 100644 --- a/onediff_diffusers_extensions/onediffx/lora/__init__.py +++ b/onediff_diffusers_extensions/onediffx/lora/__init__.py @@ -6,4 +6,4 @@ get_active_adapters, ) -from onediff.infer_compiler.utils.param_utils import update_graph_with_constant_folding_info +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_with_constant_folding_info diff --git a/onediff_diffusers_extensions/onediffx/lora/lora.py b/onediff_diffusers_extensions/onediffx/lora/lora.py index f5bb290b4..8e7896094 100644 --- a/onediff_diffusers_extensions/onediffx/lora/lora.py +++ b/onediff_diffusers_extensions/onediffx/lora/lora.py @@ -5,7 +5,7 @@ import torch -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger import diffusers from diffusers.loaders import LoraLoaderMixin diff --git a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py index a0bdf76d0..df8f17ebe 100644 --- a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py +++ b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py @@ -19,7 +19,7 @@ from diffusers.utils import is_accelerate_available from diffusers.models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from .utils import fuse_lora, get_adapter_names diff --git a/onediff_diffusers_extensions/onediffx/lora/unet.py b/onediff_diffusers_extensions/onediffx/lora/unet.py index cca033aa1..98834eeaa 100644 --- a/onediff_diffusers_extensions/onediffx/lora/unet.py +++ b/onediff_diffusers_extensions/onediffx/lora/unet.py @@ -4,7 +4,7 @@ import torch from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from diffusers.models.lora import ( LoRACompatibleConv, LoRACompatibleLinear, diff --git a/onediff_diffusers_extensions/onediffx/lora/utils.py b/onediff_diffusers_extensions/onediffx/lora/utils.py index 49fe2aca2..89b029d45 100644 --- a/onediff_diffusers_extensions/onediffx/lora/utils.py +++ b/onediff_diffusers_extensions/onediffx/lora/utils.py @@ -14,13 +14,13 @@ else: is_peft_available = lambda: False -from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_related_tensor if version.parse(diffusers.__version__) <= version.parse("0.20.0"): from diffusers.loaders import PatchedLoraProjection else: from diffusers.models.lora import PatchedLoraProjection -from onediff.infer_compiler.oneflow.dual_module import DualModule +from onediff.infer_compiler.backends.oneflow.dual_module import DualModule if version.parse(diffusers.__version__) <= version.parse("0.20.0"): from diffusers.loaders import PatchedLoraProjection diff --git a/onediff_diffusers_extensions/tests/profile_lora.py b/onediff_diffusers_extensions/tests/profile_lora.py index 1bf310aee..1ecdc3535 100644 --- a/onediff_diffusers_extensions/tests/profile_lora.py +++ b/onediff_diffusers_extensions/tests/profile_lora.py @@ -7,7 +7,7 @@ from diffusers import DiffusionPipeline from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign from onediffx.lora import load_and_fuse_lora, unfuse_lora _time = None diff --git a/onediff_diffusers_extensions/tests/profile_multi_lora.py b/onediff_diffusers_extensions/tests/profile_multi_lora.py index 88b3d7cde..e50b6a750 100644 --- a/onediff_diffusers_extensions/tests/profile_multi_lora.py +++ b/onediff_diffusers_extensions/tests/profile_multi_lora.py @@ -8,7 +8,7 @@ from diffusers.utils.constants import USE_PEFT_BACKEND from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign from onediffx.lora import load_and_fuse_lora, unfuse_lora, set_and_fuse_adapters if not USE_PEFT_BACKEND: diff --git a/onediff_sd_webui_extensions/api_examples/img2img.py b/onediff_sd_webui_extensions/api_examples/img2img.py index 4b5e6ee79..1512ba8b8 100644 --- a/onediff_sd_webui_extensions/api_examples/img2img.py +++ b/onediff_sd_webui_extensions/api_examples/img2img.py @@ -6,13 +6,13 @@ # And if you are using OneDiff Enterprise, add another # `"script_args" : [{"0": True}]` field to enable quantization -from datetime import datetime -from pathlib import Path -import urllib.request import base64 import json -import time import os +import time +import urllib.request +from datetime import datetime +from pathlib import Path webui_server_url = "http://127.0.0.1:7860" diff --git a/onediff_sd_webui_extensions/api_examples/txt2img.py b/onediff_sd_webui_extensions/api_examples/txt2img.py index 2ba72960e..9cb4b2be5 100644 --- a/onediff_sd_webui_extensions/api_examples/txt2img.py +++ b/onediff_sd_webui_extensions/api_examples/txt2img.py @@ -6,12 +6,12 @@ # And if you are using OneDiff Enterprise, add another # `"script_args" : [{"0": True}]` field to enable quantization -from datetime import datetime -import urllib.request import base64 import json -import time import os +import time +import urllib.request +from datetime import datetime webui_server_url = "http://127.0.0.1:7860" @@ -57,7 +57,6 @@ def call_txt2img_api(**payload): "width": 1024, "height": 1024, "cfg_scale": 7, - "sampler_name": "DPM++ 2M Karras", "n_iter": 1, "batch_size": 1, # Enable OneDiff speed up diff --git a/onediff_sd_webui_extensions/compile_ldm.py b/onediff_sd_webui_extensions/compile_ldm.py index e6a3aec06..e87f7f696 100644 --- a/onediff_sd_webui_extensions/compile_ldm.py +++ b/onediff_sd_webui_extensions/compile_ldm.py @@ -1,8 +1,6 @@ import os -import oneflow as flow -from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +import oneflow as flow from ldm.modules.attention import ( BasicTransformerBlock, CrossAttention, @@ -17,6 +15,9 @@ timestep_embedding, ) +from onediff.infer_compiler import oneflow_compile +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register + __all__ = ["compile_ldm_unet"] diff --git a/onediff_sd_webui_extensions/compile_sgm.py b/onediff_sd_webui_extensions/compile_sgm.py index 12398a737..154b3dc5c 100644 --- a/onediff_sd_webui_extensions/compile_sgm.py +++ b/onediff_sd_webui_extensions/compile_sgm.py @@ -1,16 +1,20 @@ import oneflow as flow -from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register from sd_webui_onediff_utils import ( CrossAttentionOflow, GroupNorm32Oflow, timestep_embedding, ) -from sgm.modules.attention import CrossAttention, SpatialTransformer -from sgm.modules.diffusionmodules.openaimodel import UNetModel, ResBlock -from sgm.modules.attention import BasicTransformerBlock +from sgm.modules.attention import ( + BasicTransformerBlock, + CrossAttention, + SpatialTransformer, +) +from sgm.modules.diffusionmodules.openaimodel import ResBlock, UNetModel from sgm.modules.diffusionmodules.util import GroupNorm32 +from onediff.infer_compiler import oneflow_compile +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register + __all__ = ["compile_sgm_unet"] diff --git a/onediff_sd_webui_extensions/compile_vae.py b/onediff_sd_webui_extensions/compile_vae.py index d5c9c7f26..f3dd03204 100644 --- a/onediff_sd_webui_extensions/compile_vae.py +++ b/onediff_sd_webui_extensions/compile_vae.py @@ -1,22 +1,27 @@ from modules import shared -from modules.sd_vae_approx import model as get_vae_model, sd_vae_approx_models from modules.sd_vae_approx import VAEApprox +from modules.sd_vae_approx import model as get_vae_model +from modules.sd_vae_approx import sd_vae_approx_models + from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register __all__ = ["VaeCompileCtx"] compiled_models = {} + class VAEApproxOflow(proxy_class(VAEApprox)): pass + torch2oflow_class_map = { VAEApprox: VAEApproxOflow, } register(package_names=["modules"], torch2oflow_class_map=torch2oflow_class_map) + class VaeCompileCtx(object): def __init__(self, options=None): self._options = options diff --git a/onediff_sd_webui_extensions/onediff_hijack.py b/onediff_sd_webui_extensions/onediff_hijack.py index f2683ac42..c8da677c6 100644 --- a/onediff_sd_webui_extensions/onediff_hijack.py +++ b/onediff_sd_webui_extensions/onediff_hijack.py @@ -1,6 +1,7 @@ -import oneflow import compile_ldm import compile_sgm +import oneflow + # https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/1c0a0c4c26f78c32095ebc7f8af82f5c04fca8c0/modules/sd_hijack_unet.py#L8 class OneFlowHijackForUnet: @@ -8,12 +9,15 @@ class OneFlowHijackForUnet: This is oneflow, but with cat that resizes tensors to appropriate dimensions if they do not match; this makes it possible to create pictures with dimensions that are multiples of 8 rather than 64 """ + def __getattr__(self, item): - if item == 'cat': + if item == "cat": return self.cat if hasattr(oneflow, item): return getattr(oneflow, item) - raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'") + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{item}'" + ) def cat(self, tensors, *args, **kwargs): if len(tensors) == 2: @@ -22,11 +26,13 @@ def cat(self, tensors, *args, **kwargs): tensors = (a, b) return oneflow.cat(tensors, *args, **kwargs) + hijack_flow = OneFlowHijackForUnet() + def unload_model_weights(sd_model=None, info=None): - from modules import lowvram, devices - from modules import shared + from modules import devices, lowvram, shared + m = sd_model or shared.sd_model if m.lowvram: lowvram.send_everything_to_cpu() @@ -35,10 +41,12 @@ def unload_model_weights(sd_model=None, info=None): devices.torch_gc() return sd_model + def send_model_to_cpu(m): # do nothing pass + def hijack_function(module, name, new_name, new_value): # restore original function in case of reload unhijack_function(module=module, name=name, new_name=new_name) @@ -51,35 +59,39 @@ def unhijack_function(module, name, new_name): setattr(module, name, getattr(module, new_name)) delattr(module, new_name) + def do_hijack(): compile_ldm.flow = hijack_flow compile_sgm.flow = hijack_flow - from modules import sd_models, script_callbacks + from modules import script_callbacks, sd_models + script_callbacks.on_script_unloaded(undo_hijack) hijack_function( module=sd_models, - name='unload_model_weights', - new_name='__onediff_original_unload_model_weights', + name="unload_model_weights", + new_name="__onediff_original_unload_model_weights", new_value=unload_model_weights, ) hijack_function( module=sd_models, - name='send_model_to_cpu', - new_name='__onediff_original_send_model_to_cpu', + name="send_model_to_cpu", + new_name="__onediff_original_send_model_to_cpu", new_value=send_model_to_cpu, ) + def undo_hijack(): compile_ldm.flow = oneflow compile_sgm.flow = oneflow from modules import sd_models + unhijack_function( module=sd_models, - name='unload_model_weights', - new_name='__onediff_original_unload_model_weights', + name="unload_model_weights", + new_name="__onediff_original_unload_model_weights", ) unhijack_function( module=sd_models, - name='send_model_to_cpu', - new_name='__onediff_original_send_model_to_cpu', + name="send_model_to_cpu", + new_name="__onediff_original_send_model_to_cpu", ) diff --git a/onediff_sd_webui_extensions/onediff_lora.py b/onediff_sd_webui_extensions/onediff_lora.py index 77066873f..0bee88e9d 100644 --- a/onediff_sd_webui_extensions/onediff_lora.py +++ b/onediff_sd_webui_extensions/onediff_lora.py @@ -1,6 +1,9 @@ import torch + from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor +from onediff.infer_compiler.backends.oneflow.param_utils import ( + update_graph_related_tensor, +) class HijackLoraActivate: diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py index 3c7e887cd..5e5766c04 100644 --- a/onediff_sd_webui_extensions/scripts/onediff.py +++ b/onediff_sd_webui_extensions/scripts/onediff.py @@ -1,37 +1,42 @@ import os -import zipfile import warnings -import gradio as gr +import zipfile from pathlib import Path -from typing import Union, Dict +from typing import Dict, Union + +import gradio as gr import modules.scripts as scripts import modules.shared as shared -from modules.sd_models import select_checkpoint -from modules.processing import process_images -from modules.ui_common import create_refresh_button -from modules import script_callbacks - -from ui_utils import hints_message, get_all_compiler_caches, refresh_all_compiler_caches, all_compiler_caches_path -from compile_ldm import compile_ldm_unet, SD21CompileCtx +from compile_ldm import SD21CompileCtx, compile_ldm_unet from compile_sgm import compile_sgm_unet from compile_vae import VaeCompileCtx -from onediff_lora import HijackLoraActivate +from modules import script_callbacks +from modules.processing import process_images +from modules.sd_models import select_checkpoint +from modules.ui_common import create_refresh_button from onediff_hijack import do_hijack as onediff_do_hijack +from onediff_lora import HijackLoraActivate +from oneflow import __version__ as oneflow_version +from ui_utils import ( + all_compiler_caches_path, + get_all_compiler_caches, + hints_message, + refresh_all_compiler_caches, +) -from onediff.infer_compiler.utils.log_utils import logger +from onediff import __version__ as onediff_version from onediff.optimization.quant_optimizer import ( quantize_model, varify_can_use_quantization, ) -from onediff.infer_compiler.utils.env_var import parse_boolean_from_env -from onediff import __version__ as onediff_version -from oneflow import __version__ as oneflow_version +from onediff.utils import logger, parse_boolean_from_env """oneflow_compiled UNetModel""" compiled_unet = None is_unet_quantized = False compiled_ckpt_name = None + def generate_graph_path(ckpt_name: str, model_name: str) -> str: base_output_dir = shared.opts.outdir_samples or shared.opts.outdir_txt2img_samples save_ckpt_graphs_path = os.path.join(base_output_dir, "graphs", ckpt_name) @@ -119,14 +124,29 @@ def ui(self, is_img2img): """ with gr.Row(): # TODO: set choices as Tuple[str, str] after the version of gradio specified webui upgrades - compiler_cache = gr.Dropdown(label="Compiler caches (Beta)", choices=["None"] + get_all_compiler_caches(), value="None", elem_id="onediff_compiler_cache") - refresh_button = create_refresh_button(compiler_cache, refresh_all_compiler_caches, lambda: {"choices": ["None"] + get_all_compiler_caches()}, "onediff_refresh_compiler_caches") + compiler_cache = gr.Dropdown( + label="Compiler caches (Beta)", + choices=["None"] + get_all_compiler_caches(), + value="None", + elem_id="onediff_compiler_cache", + ) + create_refresh_button( + compiler_cache, + refresh_all_compiler_caches, + lambda: {"choices": ["None"] + get_all_compiler_caches()}, + "onediff_refresh_compiler_caches", + ) save_cache_name = gr.Textbox(label="Saved cache name (Beta)") with gr.Row(): - always_recompile = gr.components.Checkbox(label="always_recompile", visible=parse_boolean_from_env("ONEDIFF_DEBUG")) - if not varify_can_use_quantization(): - gr.HTML(hints_message) - is_quantized = gr.components.Checkbox(label="Model Quantization(int8) Speed Up", visible=varify_can_use_quantization()) + always_recompile = gr.components.Checkbox( + label="always_recompile", + visible=parse_boolean_from_env("ONEDIFF_DEBUG"), + ) + gr.HTML(hints_message, elem_id="hintMessage", visible=not varify_can_use_quantization()) + is_quantized = gr.components.Checkbox( + label="Model Quantization(int8) Speed Up", + visible=varify_can_use_quantization(), + ) return [is_quantized, compiler_cache, save_cache_name, always_recompile] def show(self, is_img2img): @@ -143,7 +163,7 @@ def get_model_type(model): "is_ssd": model.is_ssd, } - if self.current_type == None: + if self.current_type is None: is_changed = True else: for key, v in self.current_type.items(): @@ -151,11 +171,18 @@ def get_model_type(model): is_changed = True break - if is_changed == True: + if is_changed is True: self.current_type = get_model_type(model) return is_changed - def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", always_recompile=False): + def run( + self, + p, + quantization=False, + compiler_cache=None, + saved_cache_name="", + always_recompile=False, + ): global compiled_unet, compiled_ckpt_name, is_unet_quantized current_checkpoint = shared.opts.sd_model_checkpoint @@ -165,9 +192,11 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a model_changed = self.check_model_change(shared.sd_model) quantization_changed = quantization != is_unet_quantized need_recompile = ( - (quantization and ckpt_changed) # always recompile when switching ckpt with 'int8 speed model' enabled - or model_changed # always recompile when switching model to another structure - or quantization_changed # always recompile when switching model from non-quantized to quantized (and vice versa) + ( + quantization and ckpt_changed + ) # always recompile when switching ckpt with 'int8 speed model' enabled + or model_changed # always recompile when switching model to another structure + or quantization_changed # always recompile when switching model from non-quantized to quantized (and vice versa) or always_recompile ) @@ -178,16 +207,23 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a original_diffusion_model, quantization=quantization ) - if compiler_cache != "None": + # Due to the version of gradio compatible with sd-webui, the CompilerCache dropdown box always returns a string + if compiler_cache not in [None, "None"]: compiler_cache_path = all_compiler_caches_path() + f"/{compiler_cache}" if not Path(compiler_cache_path).exists(): - raise FileNotFoundError(f"Cannot find cache {compiler_cache_path}, please make sure it exists") + raise FileNotFoundError( + f"Cannot find cache {compiler_cache_path}, please make sure it exists" + ) try: compiled_unet.load_graph(compiler_cache_path, run_warmup=True) - except zipfile.BadZipFile as e: - raise RuntimeError("Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui") + except zipfile.BadZipFile: + raise RuntimeError( + "Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui" + ) except Exception as e: - raise RuntimeError("Load cache failed. Please make sure cache has the same sd version (or unet architure) with current checkpoint") + raise RuntimeError( + f"Load cache failed ({e}). Please make sure cache has the same sd version (or unet architure) with current checkpoint" + ) else: logger.info( @@ -199,8 +235,10 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a if saved_cache_name != "": if not os.access(str(all_compiler_caches_path()), os.W_OK): - raise PermissionError(f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \ - Please change it in the settings to a directory with write permissions") + raise PermissionError( + f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \ + Please change it in the settings to a directory with write permissions" + ) if not Path(all_compiler_caches_path()).exists(): Path(all_compiler_caches_path()).mkdir() saved_cache_name = all_compiler_caches_path() + f"/{saved_cache_name}" @@ -209,10 +247,18 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a return proc + def on_ui_settings(): - section = ('onediff', "OneDiff") - shared.opts.add_option("onediff_compiler_caches_path", shared.OptionInfo( - str(Path(__file__).parent.parent / "compiler_caches"), "Directory for onediff compiler caches", section=section)) + section = ("onediff", "OneDiff") + shared.opts.add_option( + "onediff_compiler_caches_path", + shared.OptionInfo( + str(Path(__file__).parent.parent / "compiler_caches"), + "Directory for onediff compiler caches", + section=section, + ), + ) + script_callbacks.on_ui_settings(on_ui_settings) onediff_do_hijack() diff --git a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py index 3b95b837c..19378d59c 100644 --- a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py +++ b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py @@ -4,7 +4,10 @@ # *Only* converts the UNet, VAE, and Text Encoder. # Does not convert optimizer state or any other thing. -__all__ = ["convert_sd", "convert_unet_calibrate_info_sd"] +__all__ = [ + # "convert_sd", + "convert_unet_calibrate_info_sd", +] import argparse import os.path as osp @@ -14,7 +17,6 @@ import torch from safetensors.torch import load_file, save_file - # =================# # UNet Conversion # # =================# @@ -304,6 +306,7 @@ def convert_text_enc_state_dict_v20(text_enc_dict): def convert_text_enc_state_dict(text_enc_dict): return text_enc_dict + def convert_unet_calibrate_dict(state_dict) -> str: mapping = {k: k for k in state_dict} remove_suffix = ( @@ -345,14 +348,31 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): for name, info in dst_info.items(): f.write(f"{name} {info}\n") + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model_path", default=None, type=str, required=True, help="Path to the model to convert.") - parser.add_argument("--checkpoint_path", default=None, type=str, required=True, help="Path to the output model.") - parser.add_argument("--half", action="store_true", help="Save weights in half precision.") parser.add_argument( - "--use_safetensors", action="store_true", help="Save weights use safetensors, default is ckpt." + "--model_path", + default=None, + type=str, + required=True, + help="Path to the model to convert.", + ) + parser.add_argument( + "--checkpoint_path", + default=None, + type=str, + required=True, + help="Path to the output model.", + ) + parser.add_argument( + "--half", action="store_true", help="Save weights in half precision." + ) + parser.add_argument( + "--use_safetensors", + action="store_true", + help="Save weights use safetensors, default is ckpt.", ) args = parser.parse_args() @@ -387,7 +407,9 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): # Convert the UNet model unet_state_dict = convert_unet_state_dict(unet_state_dict) - unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()} + unet_state_dict = { + "model.diffusion_model." + k: v for k, v in unet_state_dict.items() + } # Convert the VAE model vae_state_dict = convert_vae_state_dict(vae_state_dict) @@ -400,10 +422,14 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()} text_enc_dict = convert_text_enc_state_dict_v20(text_enc_dict) - text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = { + "cond_stage_model.model." + k: v for k, v in text_enc_dict.items() + } else: text_enc_dict = convert_text_enc_state_dict(text_enc_dict) - text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = { + "cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items() + } # Put together new checkpoint state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict} @@ -416,8 +442,13 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): state_dict = {"state_dict": state_dict} torch.save(state_dict, args.checkpoint_path) - calibrate_info_save_path = Path(args.checkpoint_path).parent / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" - convert_unet_calibrate_info_sd(args.model_path + "/calibrate_info.txt", calibrate_info_save_path) + calibrate_info_save_path = ( + Path(args.checkpoint_path).parent + / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" + ) + convert_unet_calibrate_info_sd( + args.model_path + "/calibrate_info.txt", calibrate_info_save_path + ) # def get_unet_state_dict(model_path): # unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors") @@ -490,4 +521,3 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path): # else: # state_dict = {"state_dict": state_dict} # torch.save(state_dict, checkpoint_path) - diff --git a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py index 7284c9293..e97c5849f 100644 --- a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py +++ b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py @@ -4,7 +4,10 @@ # *Only* converts the UNet, VAE, and Text Encoder. # Does not convert optimizer state or any other thing. -__all__ = ["convert_sdxl", "convert_unet_calibrate_info_sdxl"] +__all__ = [ + # "convert_sdxl", + "convert_unet_calibrate_info_sdxl", +] import argparse import os.path as osp @@ -14,7 +17,6 @@ import torch from safetensors.torch import load_file, save_file - # =================# # UNet Conversion # # =================# @@ -285,6 +287,7 @@ def convert_openclip_text_enc_state_dict(text_enc_dict): def convert_openai_text_enc_state_dict(text_enc_dict): return text_enc_dict + def convert_unet_calibrate_dict(state_dict) -> str: mapping = {k: k for k in state_dict} remove_suffix = ( @@ -333,11 +336,27 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model_path", default=None, type=str, required=True, help="Path to the model to convert.") - parser.add_argument("--checkpoint_path", default=None, type=str, required=True, help="Path to the output model.") - parser.add_argument("--half", action="store_true", help="Save weights in half precision.") parser.add_argument( - "--use_safetensors", action="store_true", help="Save weights use safetensors, default is ckpt." + "--model_path", + default=None, + type=str, + required=True, + help="Path to the model to convert.", + ) + parser.add_argument( + "--checkpoint_path", + default=None, + type=str, + required=True, + help="Path to the output model.", + ) + parser.add_argument( + "--half", action="store_true", help="Save weights in half precision." + ) + parser.add_argument( + "--use_safetensors", + action="store_true", + help="Save weights use safetensors, default is ckpt.", ) args = parser.parse_args() @@ -374,12 +393,16 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): if osp.exists(text_enc_2_path): text_enc_2_dict = load_file(text_enc_2_path, device="cpu") else: - text_enc_2_path = osp.join(args.model_path, "text_encoder_2", "pytorch_model.bin") + text_enc_2_path = osp.join( + args.model_path, "text_encoder_2", "pytorch_model.bin" + ) text_enc_2_dict = torch.load(text_enc_2_path, map_location="cpu") # Convert the UNet model unet_state_dict = convert_unet_state_dict(unet_state_dict) - unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()} + unet_state_dict = { + "model.diffusion_model." + k: v for k, v in unet_state_dict.items() + } # Convert the VAE model vae_state_dict = convert_vae_state_dict(vae_state_dict) @@ -387,19 +410,30 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): # Convert text encoder 1 text_enc_dict = convert_openai_text_enc_state_dict(text_enc_dict) - text_enc_dict = {"conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = { + "conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items() + } # Convert text encoder 2 text_enc_2_dict = convert_openclip_text_enc_state_dict(text_enc_2_dict) - text_enc_2_dict = {"conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items()} + text_enc_2_dict = { + "conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items() + } # We call the `.T.contiguous()` to match what's done in # https://github.com/huggingface/diffusers/blob/84905ca7287876b925b6bf8e9bb92fec21c78764/src/diffusers/loaders/single_file_utils.py#L1085 - text_enc_2_dict["conditioner.embedders.1.model.text_projection"] = text_enc_2_dict.pop( + text_enc_2_dict[ + "conditioner.embedders.1.model.text_projection" + ] = text_enc_2_dict.pop( "conditioner.embedders.1.model.text_projection.weight" ).T.contiguous() # Put together new checkpoint - state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict, **text_enc_2_dict} + state_dict = { + **unet_state_dict, + **vae_state_dict, + **text_enc_dict, + **text_enc_2_dict, + } if args.half: state_dict = {k: v.half() for k, v in state_dict.items()} @@ -410,8 +444,13 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): state_dict = {"state_dict": state_dict} torch.save(state_dict, args.checkpoint_path) - calibrate_info_save_path = Path(args.checkpoint_path).parent / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" - convert_unet_calibrate_info_sdxl(args.model_path + "/calibrate_info.txt", calibrate_info_save_path) + calibrate_info_save_path = ( + Path(args.checkpoint_path).parent + / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt" + ) + convert_unet_calibrate_info_sdxl( + args.model_path + "/calibrate_info.txt", calibrate_info_save_path + ) # def get_unet_state_dict(model_path): @@ -497,4 +536,3 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path): # else: # state_dict = {"state_dict": state_dict} # torch.save(state_dict, checkpoint_path) - diff --git a/onediff_sd_webui_extensions/ui_utils.py b/onediff_sd_webui_extensions/ui_utils.py index 7feea4eaa..7e442be4a 100644 --- a/onediff_sd_webui_extensions/ui_utils.py +++ b/onediff_sd_webui_extensions/ui_utils.py @@ -1,42 +1,48 @@ from pathlib import Path +from textwrap import dedent -hints_message = """ -
-
- Hints Message -
-
- Hints: Enterprise function is not supported on your system. -
-

- If you need Enterprise Level Support for your system or business, please send an email to - business@siliconflow.com. -
- Tell us about your use case, deployment scale, and requirements. -

-

- GitHub Issue: - https://github.com/siliconflow/onediff/issues -

-
- """ +hints_message = dedent("""\ +
+ +
+ Hints Message +
+
+ Hints: Enterprise function is not supported on your system. +
+

+ If you need Enterprise Level Support for your system or business, please send an email to + business@siliconflow.com. +
+ Tell us about your use case, deployment scale, and requirements. +

+

+ GitHub Issue: + https://github.com/siliconflow/onediff/issues +

+
+""") all_compiler_caches = [] + def all_compiler_caches_path(): import modules.shared as shared + caches_path = Path(shared.opts.onediff_compiler_caches_path) if not caches_path.exists(): caches_path.mkdir(parents=True) return shared.opts.onediff_compiler_caches_path + def get_all_compiler_caches(): global all_compiler_caches if len(all_compiler_caches) == 0: refresh_all_compiler_caches() return all_compiler_caches + def refresh_all_compiler_caches(path: Path = None): global all_compiler_caches path = path or all_compiler_caches_path() - all_compiler_caches = [f.stem for f in Path(path).iterdir() if f.is_file()] \ No newline at end of file + all_compiler_caches = [f.stem for f in Path(path).iterdir() if f.is_file()] diff --git a/src/infer_compiler_registry/register_diffusers/__init__.py b/src/infer_compiler_registry/register_diffusers/__init__.py index 98f15954a..0709a45ce 100644 --- a/src/infer_compiler_registry/register_diffusers/__init__.py +++ b/src/infer_compiler_registry/register_diffusers/__init__.py @@ -1,4 +1,4 @@ -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from packaging import version import importlib.metadata @@ -15,11 +15,19 @@ if diffusers_version < version.parse("0.26.00"): from diffusers.models.unet_2d_condition import UNet2DConditionModel - from diffusers.models.unet_2d_blocks import AttnUpBlock2D, CrossAttnUpBlock2D, UpBlock2D + from diffusers.models.unet_2d_blocks import ( + AttnUpBlock2D, + CrossAttnUpBlock2D, + UpBlock2D, + ) from diffusers.models.transformer_2d import Transformer2DModel else: from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel - from diffusers.models.unets.unet_2d_blocks import AttnUpBlock2D, CrossAttnUpBlock2D, UpBlock2D + from diffusers.models.unets.unet_2d_blocks import ( + AttnUpBlock2D, + CrossAttnUpBlock2D, + UpBlock2D, + ) from diffusers.models.transformers.transformer_2d import Transformer2DModel if diffusers_version >= version.parse("0.25.00"): @@ -34,7 +42,9 @@ from diffusers.models.unets.unet_spatio_temporal_condition import ( UNetSpatioTemporalConditionModel, ) - from diffusers.models.transformers.transformer_temporal import TransformerSpatioTemporalModel + from diffusers.models.transformers.transformer_temporal import ( + TransformerSpatioTemporalModel, + ) else: from diffusers.models.transformer_temporal import TransformerSpatioTemporalModel from diffusers.models.unet_spatio_temporal_condition import ( @@ -47,8 +57,10 @@ ) else: from diffusers.models.autoencoder_kl_temporal_decoder import TemporalDecoder - - from .spatio_temporal_oflow import SpatioTemporalResBlock as SpatioTemporalResBlockOflow + + from .spatio_temporal_oflow import ( + SpatioTemporalResBlock as SpatioTemporalResBlockOflow, + ) from .spatio_temporal_oflow import TemporalDecoder as TemporalDecoderOflow from .spatio_temporal_oflow import ( TransformerSpatioTemporalModel as TransformerSpatioTemporalModelOflow, diff --git a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py index a8b0e571e..8bf0a367e 100644 --- a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py @@ -22,7 +22,7 @@ import diffusers from diffusers.utils import deprecate, logging -from onediff.infer_compiler.utils import parse_boolean_from_env, set_boolean_env_var +from onediff.utils import parse_boolean_from_env, set_boolean_env_var def is_xformers_available(): diff --git a/src/infer_compiler_registry/register_diffusers/resnet_oflow.py b/src/infer_compiler_registry/register_diffusers/resnet_oflow.py index 5e33c9970..3133cabab 100644 --- a/src/infer_compiler_registry/register_diffusers/resnet_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/resnet_oflow.py @@ -5,7 +5,7 @@ from packaging import version import importlib.metadata -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_diffusers = transform_mgr.transform_package("diffusers") diff --git a/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py b/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py index 12dbb49d2..fd4aacb54 100644 --- a/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py @@ -30,7 +30,7 @@ if diffusers_version >= diffusers_0240_v: - from onediff.infer_compiler.transform import transform_mgr + from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_diffusers = transform_mgr.transform_package("diffusers") diff --git a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py index f343356b8..e07371411 100644 --- a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py @@ -6,7 +6,7 @@ import oneflow as torch import oneflow.nn.functional as F from oneflow import nn -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_diffusers = transform_mgr.transform_package("diffusers") @@ -968,7 +968,9 @@ def forward( if diffusers_version >= diffusers_0270_v: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning( + "Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored." + ) # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py index 86234c5c7..54ae20ae3 100644 --- a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py @@ -2,7 +2,7 @@ from packaging import version import importlib.metadata import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_0210_v = version.parse("0.21.0") diffusers_version = version.parse(importlib.metadata.version("diffusers")) @@ -70,7 +70,9 @@ def custom_forward(*inputs): ckpt_kwargs: Dict[str, Any] = { "use_reentrant": False - } if transformed_diffusers.utils.is_torch_version(">=", "1.11.0") else {} + } if transformed_diffusers.utils.is_torch_version( + ">=", "1.11.0" + ) else {} hidden_states = torch.utils.checkpoint.checkpoint( create_custom_forward(resnet), hidden_states, @@ -236,7 +238,9 @@ def custom_forward(*inputs): ckpt_kwargs: Dict[str, Any] = { "use_reentrant": False - } if transformed_diffusers.utils.is_torch_version(">=", "1.11.0") else {} + } if transformed_diffusers.utils.is_torch_version( + ">=", "1.11.0" + ) else {} hidden_states = torch.utils.checkpoint.checkpoint( create_custom_forward(resnet), hidden_states, diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py index c36303415..d87724d07 100644 --- a/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py @@ -2,7 +2,7 @@ from packaging import version import importlib.metadata import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_0210_v = version.parse("0.21.0") diffusers_version = version.parse(importlib.metadata.version("diffusers")) diff --git a/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py b/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py index d8bf735f9..fb2028b40 100644 --- a/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py +++ b/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py @@ -1,4 +1,4 @@ -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register import oneflow as flow import diffusers_enterprise_lite diff --git a/src/infer_compiler_registry/register_onediff_quant/__init__.py b/src/infer_compiler_registry/register_onediff_quant/__init__.py index e9ab3afd8..dd5a37a26 100644 --- a/src/infer_compiler_registry/register_onediff_quant/__init__.py +++ b/src/infer_compiler_registry/register_onediff_quant/__init__.py @@ -1,4 +1,4 @@ -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register import oneflow as flow import onediff_quant diff --git a/src/onediff/infer_compiler/__init__.py b/src/onediff/infer_compiler/__init__.py index bff98d894..7110e897e 100644 --- a/src/onediff/infer_compiler/__init__.py +++ b/src/onediff/infer_compiler/__init__.py @@ -1,10 +1,4 @@ import os import torch -from .core import * -from .utils import set_default_env_vars -from .utils.options import CompileOptions -from .utils.options import _GLOBAL_compile_options as compile_options - - -set_default_env_vars() +from .backends import * diff --git a/src/onediff/infer_compiler/backends/__init__.py b/src/onediff/infer_compiler/backends/__init__.py index e69de29bb..bbaef1412 100644 --- a/src/onediff/infer_compiler/backends/__init__.py +++ b/src/onediff/infer_compiler/backends/__init__.py @@ -0,0 +1,4 @@ +from .deployable_module import DeployableModule +from .compiler import compile +from .compiler import oneflow_compile +from .oneflow import OneflowCompileOptions diff --git a/src/onediff/infer_compiler/core/with_onediff_compile.py b/src/onediff/infer_compiler/backends/compiler.py similarity index 90% rename from src/onediff/infer_compiler/core/with_onediff_compile.py rename to src/onediff/infer_compiler/backends/compiler.py index 3ab038162..4bf91bb83 100644 --- a/src/onediff/infer_compiler/core/with_onediff_compile.py +++ b/src/onediff/infer_compiler/backends/compiler.py @@ -1,12 +1,14 @@ import torch + from .deployable_module import DeployableModule _DEFAULT_BACKEND = "oneflow" + def compile( torch_module: torch.nn.Module, *, backend=_DEFAULT_BACKEND, options=None ) -> DeployableModule: - from ..backends.registry import lookup_backend + from .registry import lookup_backend backend = lookup_backend(backend) model = backend(torch_module, options=options) diff --git a/src/onediff/infer_compiler/core/deployable_module.py b/src/onediff/infer_compiler/backends/deployable_module.py similarity index 100% rename from src/onediff/infer_compiler/core/deployable_module.py rename to src/onediff/infer_compiler/backends/deployable_module.py diff --git a/src/onediff/infer_compiler/backends/nexfort.py b/src/onediff/infer_compiler/backends/nexfort.py deleted file mode 100644 index 67cca8cbc..000000000 --- a/src/onediff/infer_compiler/backends/nexfort.py +++ /dev/null @@ -1,32 +0,0 @@ -import dataclasses -import torch -from .registry import register_backend - - -def make_inductor_options(options): - inductor_options = {} - if options is None: - return inductor_options - for filed in dataclasses.fields(options): - filed_name = filed.name - inductor_options[f"inductor.{filed_name}"] = getattr(options, filed_name) - return inductor_options - - -@register_backend("nexfort") -def compile(torch_module: torch.nn.Module, *, options=None): - from nexfort.utils.memory_format import apply_memory_format - from nexfort.compilers import nexfort_compile - from ..nexfort.deployable_module import NexfortDeployableModule - from ..utils import CompileOptions - - options = options if options is not None else CompileOptions() - nexfort_options = options.nexfort - if nexfort_options.memory_format != torch.preserve_format: - model = apply_memory_format( - torch_module, memory_format=nexfort_options.memory_format - ) - model = nexfort_compile( - model, options=make_inductor_options(nexfort_options.inductor) - ) - return NexfortDeployableModule(model) diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md new file mode 100644 index 000000000..a36f38d5c --- /dev/null +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -0,0 +1,33 @@ +## nexfort backend for compiler in onediff +### Dependency +``` +pip3 install --pre -U torch==2.4.0.dev20240507 torchaudio==2.2.0.dev20240507+cu124 torchvision==0.19.0.dev20240507+cu124 --index-url https://download.pytorch.org/whl/nightly/cu124 +pip3 install -U torchao==0.1 +``` + +### Install nexfort + +Before installing nextfort, please make sure that the corresponding PyTorch and CUDA environments are installed. + +``` +# PyTorch 2.3.0, CUDA 12.1 +pip3 install https://nexfort-releases.oss-cn-hangzhou.aliyuncs.com/nexfort-0.1.dev215%2Btorch230cu121-cp310-cp310-manylinux2014_x86_64.whl + +# PyTorch 2.4.0, CUDA 12.1 +pip3 install https://nexfort-releases.oss-cn-hangzhou.aliyuncs.com/nexfort-0.1.dev215%2Btorch240dev20240507cu121-cp310-cp310-manylinux2014_x86_64.whl + +# PyTorch 2.4.0, CUDA 12.4 +pip3 install https://nexfort-releases.oss-cn-hangzhou.aliyuncs.com/nexfort-0.1.dev215%2Btorch240dev20240507cu124-cp310-cp310-manylinux2014_x86_64.whl +``` + +### Run pixart alpha (with nexfort backend) + +``` +# model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ +python3 ./benchmarks/text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --steps 20 --compiler nexfort --output-image ./pixart_alpha_nex.png +``` +Performance on NVIDIA A100-PCIE-40GB: +- Warmup time: 771.418s +- Inference time: 2.045s +- Iterations per second: 10.743 +- Max used CUDA memory: 13.855GiB diff --git a/src/onediff/infer_compiler/backends/nexfort/__init__.py b/src/onediff/infer_compiler/backends/nexfort/__init__.py new file mode 100644 index 000000000..1ea5f954e --- /dev/null +++ b/src/onediff/infer_compiler/backends/nexfort/__init__.py @@ -0,0 +1 @@ +from . import nexfort as _nexfort_backend diff --git a/src/onediff/infer_compiler/nexfort/deployable_module.py b/src/onediff/infer_compiler/backends/nexfort/deployable_module.py similarity index 58% rename from src/onediff/infer_compiler/nexfort/deployable_module.py rename to src/onediff/infer_compiler/backends/nexfort/deployable_module.py index eb8a91be2..a9e94977e 100644 --- a/src/onediff/infer_compiler/nexfort/deployable_module.py +++ b/src/onediff/infer_compiler/backends/nexfort/deployable_module.py @@ -1,12 +1,13 @@ import torch -from ..core.deployable_module import DeployableModule + +from ..deployable_module import DeployableModule class NexfortDeployableModule(DeployableModule): - def __init__(self, torch_module): + def __init__(self, compiled_module, torch_module): torch.nn.Module.__init__(self) - object.__setattr__(self, "_deployable_module_model", torch_module) - object.__setattr__(self, "_modules", torch_module._modules) + object.__setattr__(self, "_deployable_module_model", compiled_module) + object.__setattr__(self, "_modules", compiled_module._modules) object.__setattr__(self, "_torch_module", torch_module) def __call__(self, *args, **kwargs): diff --git a/src/onediff/infer_compiler/backends/nexfort/nexfort.py b/src/onediff/infer_compiler/backends/nexfort/nexfort.py new file mode 100644 index 000000000..d7f2fa69d --- /dev/null +++ b/src/onediff/infer_compiler/backends/nexfort/nexfort.py @@ -0,0 +1,17 @@ +import dataclasses +import torch +from ..registry import register_backend + + +@register_backend("nexfort") +def compile(torch_module: torch.nn.Module, *, options=None): + from nexfort.compilers import nexfort_compile + if isinstance(options, str): + import json + + # TODO(): using jsonschema to define the options schema + options = json.loads(options) + + nexfort_options = options if options is not None else dict() + compiled_model = nexfort_compile(torch_module, **nexfort_options) + return compiled_model diff --git a/src/onediff/infer_compiler/backends/oneflow/__init__.py b/src/onediff/infer_compiler/backends/oneflow/__init__.py new file mode 100644 index 000000000..69c5c9b11 --- /dev/null +++ b/src/onediff/infer_compiler/backends/oneflow/__init__.py @@ -0,0 +1,3 @@ +from . import oneflow as _oneflow_backend +from .deployable_module import OneflowDeployableModule +from .env_var import OneflowCompileOptions diff --git a/src/onediff/infer_compiler/utils/args_tree_util.py b/src/onediff/infer_compiler/backends/oneflow/args_tree_util.py similarity index 98% rename from src/onediff/infer_compiler/utils/args_tree_util.py rename to src/onediff/infer_compiler/backends/oneflow/args_tree_util.py index 598b95828..fb253e800 100644 --- a/src/onediff/infer_compiler/utils/args_tree_util.py +++ b/src/onediff/infer_compiler/backends/oneflow/args_tree_util.py @@ -1,7 +1,7 @@ import torch import oneflow as flow from oneflow.framework.args_tree import ArgsTree -from .log_utils import logger +from onediff.utils import logger def input_output_processor(func): diff --git a/src/onediff/infer_compiler/oneflow/deployable_module.py b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py similarity index 72% rename from src/onediff/infer_compiler/oneflow/deployable_module.py rename to src/onediff/infer_compiler/backends/oneflow/deployable_module.py index 71db38870..dd73dd111 100644 --- a/src/onediff/infer_compiler/oneflow/deployable_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py @@ -1,18 +1,55 @@ import types import torch +from functools import wraps + import oneflow as flow -from ..core.deployable_module import DeployableModule -from ..transform.manager import transform_mgr -from ..utils.oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled -from ..utils.args_tree_util import input_output_processor -from ..utils.log_utils import logger -from ..utils.param_utils import parse_device, check_device, generate_constant_folding_info -from ..utils.graph_management_utils import graph_file_management -from ..utils.online_quantization_utils import quantize_and_deploy_wrapper -from ..utils.options import OneflowCompileOptions +from onediff.utils import logger + +from ..deployable_module import DeployableModule + +from .transform.manager import transform_mgr +from .transform.builtin_transform import torch2oflow + +from .dual_module import DualModule, get_mixed_dual_module +from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled +from .args_tree_util import input_output_processor +from .param_utils import parse_device, check_device, generate_constant_folding_info +from .graph_management_utils import graph_file_management +from .online_quantization_utils import quantize_and_deploy_wrapper +from .env_var import OneflowCompileOptions + + +@torch2oflow.register +def _(mod: DualModule, verbose=False): + return torch2oflow(mod._torch_module, verbose) + + +def handle_deployable_exception(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + if transform_mgr.debug_mode: + return func(self, *args, **kwargs) + else: + try: + return func(self, *args, **kwargs) + except Exception as e: + logger.error(f"Exception in {func.__name__}: {e=}") + logger.warning("Recompile oneflow module ...") + del self._deployable_module_model.oneflow_module + self._deployable_module_dpl_graph = None + return func(self, *args, **kwargs) + + return wrapper + + +def get_oneflow_graph(model, size=9, dynamic_graph=True): + from .graph import OneflowGraph -from .utils import handle_deployable_exception, get_mixed_dual_module, get_oneflow_graph + g = OneflowGraph(model) + g._dynamic_input_graph_cache.set_cache_size(size) + g._dynamic_input_graph_cache.enable_shared(dynamic_graph) + return g class OneflowDeployableModule(DeployableModule): @@ -199,3 +236,29 @@ def apply_online_quant(self, quant_config): >>> model.apply_online_quant(quant_config) """ self._deployable_module_quant_config = quant_config + + +def get_mixed_deployable_module(module_cls): + class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls): + def __init__(self, torch_module, oneflow_module, dynamic=True, options=None): + OneflowDeployableModule.__init__( + self, torch_module, oneflow_module, dynamic, options + ) + self._is_raw_deployable_module = False + + @classmethod + def from_existing(cls, existing_module, dynamic=True, options=None): + torch_module = existing_module._deployable_module_model._torch_module + oneflow_module = existing_module._deployable_module_model._oneflow_module + instance = cls(torch_module, oneflow_module, dynamic, options) + instance._deployable_module_dpl_graph = None + if hasattr(existing_module, "_deployable_module_dpl_graph"): + instance._deployable_module_dpl_graph = ( + existing_module._deployable_module_dpl_graph + ) + return instance + + def _get_name(self): + return f"{self.__class__.__name__}(of {module_cls.__name__})" + + return MixedOneflowDeployableModule diff --git a/src/onediff/infer_compiler/oneflow/dual_module.py b/src/onediff/infer_compiler/backends/oneflow/dual_module.py similarity index 85% rename from src/onediff/infer_compiler/oneflow/dual_module.py rename to src/onediff/infer_compiler/backends/oneflow/dual_module.py index 11a59ca18..903d814c7 100644 --- a/src/onediff/infer_compiler/oneflow/dual_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/dual_module.py @@ -7,9 +7,9 @@ import oneflow as flow from oneflow.utils.tensor import to_torch -from ..transform.builtin_transform import torch2oflow -from ..utils.oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled -from ..utils.log_utils import logger +from onediff.utils import logger +from .transform.builtin_transform import torch2oflow +from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled class DualModule(torch.nn.Module): @@ -29,6 +29,7 @@ def oneflow_module(self): logger.debug(f"Convert {type(self._torch_module)} ...") self._oneflow_module = torch2oflow(self._torch_module) logger.debug(f"Convert {type(self._torch_module)} done!") + return self._oneflow_module @oneflow_module.deleter @@ -91,8 +92,6 @@ def __getattr__(self, name): return DualModuleList(torch_attr, oneflow_attr) elif isinstance(torch_attr, torch.nn.Module): - from .utils import get_mixed_dual_module - return get_mixed_dual_module(torch_attr.__class__)(torch_attr, oneflow_attr) else: return oneflow_attr if oneflow_exec_mode_enabled() else torch_attr @@ -101,6 +100,13 @@ def __setattr__(self, name: str, value: Any) -> None: if name in ["_torch_module", "_oneflow_module"]: super().__setattr__(name, value) else: # TODO: aviod memory up when set attr + _torch_module: torch.nn.Module = self._torch_module + if ( + hasattr(_torch_module, "_disable_param_update") + and _torch_module._disable_param_update + ): + return + if self._oneflow_module is not None: v = torch2oflow(value) if isinstance(v, flow.Tensor): @@ -108,7 +114,7 @@ def __setattr__(self, name: str, value: Any) -> None: obj.copy_(v) else: setattr(self._oneflow_module, name, v) - setattr(self._torch_module, name, value) + setattr(_torch_module, name, value) def extra_repr(self) -> str: return self._torch_module.extra_repr() @@ -120,7 +126,6 @@ def __init__(self, torch_modules, oneflow_modules): assert len(torch_modules) == len(oneflow_modules) self._torch_modules = torch_modules self._oneflow_modules = oneflow_modules - from .utils import get_mixed_dual_module dual_modules = [] for torch_module, oneflow_module in zip( @@ -152,3 +157,19 @@ def __setattr__(self, key, value): value = torch2oflow(value) setattr(self._oneflow_modules, key, value) return object.__setattr__(self, key, value) + + +def get_mixed_dual_module(module_cls): + if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__: + return module_cls + + class MixedDualModule(DualModule, module_cls): + def __init__(self, torch_module, oneflow_module): + while isinstance(torch_module, DualModule): + torch_module = torch_module._torch_module + DualModule.__init__(self, torch_module, oneflow_module) + + def _get_name(self) -> str: + return f"{self.__class__.__name__}(of {module_cls.__name__})" + + return MixedDualModule diff --git a/src/onediff/infer_compiler/utils/env_var.py b/src/onediff/infer_compiler/backends/oneflow/env_var.py similarity index 76% rename from src/onediff/infer_compiler/utils/env_var.py rename to src/onediff/infer_compiler/backends/oneflow/env_var.py index ce58d8f93..68cdef570 100644 --- a/src/onediff/infer_compiler/utils/env_var.py +++ b/src/onediff/infer_compiler/backends/oneflow/env_var.py @@ -1,35 +1,50 @@ import dataclasses import os +import torch from typing import Optional - -def parse_boolean_from_env(env_var, default_value=None): - env_var = os.getenv(env_var) - if env_var is None: - return default_value - env_var = env_var.lower() - return env_var in ("1", "true", "yes", "on", "y") - - -def set_boolean_env_var(env_var: str, val: Optional[bool]): - if val is None: - os.environ.pop(env_var, None) - else: - os.environ[env_var] = "1" if val else "0" - - -def parse_integer_from_env(env_var, default_value=None): - env_var = os.getenv(env_var) - if env_var is None: - return default_value - return int(env_var) - - -def set_integer_env_var(env_var: str, val: Optional[int]): - if val is None: - os.environ.pop(env_var, None) - else: - os.environ[env_var] = str(int(val)) +from onediff.utils import set_boolean_env_var, set_integer_env_var + + +@dataclasses.dataclass +class OneflowCompileOptions: + dynamic: bool = True + use_graph: bool = True + debug_level: int = -1 + max_cached_graph_size: int = 9 + graph_file: str = None + graph_file_device: torch.device = None + + # Optimization related environment variables + run_graph_by_vm: bool = None + graph_delay_variable_op_execution: bool = None + + conv_allow_half_precision_accumulation: bool = None + matmul_allow_half_precision_accumulation: bool = None + attention_allow_half_precision_accumulation: bool = None + attention_allow_half_precision_score_accumulation_max_m: int = None + attention_allow_quantization: bool = None + + mlir_cse: bool = None + mlir_enable_inference_optimization: bool = None + mlir_enable_round_trip: bool = None + mlir_fuse_forward_ops: bool = None + mlir_fuse_ops_with_backward_impl: bool = None + mlir_group_matmul: bool = None + mlir_prefer_nhwc: bool = None + mlir_fuse_kernel_launch: bool = None + + kernel_enable_cuda_graph: bool = None + kernel_enable_fused_conv_bias: bool = None + kernel_enable_fused_linear: bool = None + kernel_conv_cutlass_impl_enable_tuning_warmup: bool = None + kernel_enable_conv2d_tuning_warmup: bool = None + kernel_gemm_cutlass_impl_enable_tuning_warmup: bool = None + kernel_conv_enable_cutlass_impl: bool = None + kernel_gemm_enable_cutlass_impl: bool = None + kernel_glu_enable_dual_gemm_impl: bool = None + kernel_glu_enable_y_gemm_impl: bool = None + kernel_glu_quant_enable_dual_gemm_impl: bool = None def _set_env_vars(field2env_var, options): @@ -117,17 +132,3 @@ def set_oneflow_default_env_vars(): # TODO: enable this will cause the failure of multi resolution warmup # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1") # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1") - - -def set_nexfort_env_vars(options): - field2env_var = {} - _set_env_vars(field2env_var, options) - - -def set_nexfort_default_env_vars(): - pass - - -def set_default_env_vars(): - set_oneflow_default_env_vars() - set_nexfort_default_env_vars() diff --git a/src/onediff/infer_compiler/oneflow/graph.py b/src/onediff/infer_compiler/backends/oneflow/graph.py similarity index 92% rename from src/onediff/infer_compiler/oneflow/graph.py rename to src/onediff/infer_compiler/backends/oneflow/graph.py index 34aef1663..301270832 100644 --- a/src/onediff/infer_compiler/oneflow/graph.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph.py @@ -1,9 +1,9 @@ import oneflow as flow -from ..transform.manager import transform_mgr -from ..transform.builtin_transform import reverse_proxy_class -from ..utils.log_utils import logger -from ..utils.cost_util import cost_cnt +from onediff.utils import logger +from .transform.manager import transform_mgr +from .transform.builtin_transform import reverse_proxy_class +from .utils.cost_util import cost_cnt class OneflowGraph(flow.nn.Graph): diff --git a/src/onediff/infer_compiler/utils/graph_management_utils.py b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py similarity index 92% rename from src/onediff/infer_compiler/utils/graph_management_utils.py rename to src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py index f60afbc7e..534fe69c4 100644 --- a/src/onediff/infer_compiler/utils/graph_management_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py @@ -7,11 +7,11 @@ from pathlib import Path from functools import wraps from oneflow.framework.args_tree import ArgsTree -from ..transform.builtin_transform import torch2oflow -from ..transform.manager import transform_mgr -from .log_utils import logger -from .cost_util import cost_time -from .options import OneflowCompileOptions +from .transform.builtin_transform import torch2oflow +from .transform.manager import transform_mgr +from .utils.cost_util import cost_time +from .env_var import OneflowCompileOptions +from onediff.utils import logger def calculate_model_hash(model): @@ -57,9 +57,12 @@ def wrapper(self, *args, **kwargs): # Avoid graph file conflicts if importlib.util.find_spec("register_comfy"): from register_comfy import CrossAttntionStateDictPatch as state_patch + attn2_patch_sum = state_patch.attn2_patch_sum(input_kwargs=kwargs) if attn2_patch_sum > 0: - graph_file = graph_file.replace(".graph", f"_attn2_{attn2_patch_sum}.graph") + graph_file = graph_file.replace( + ".graph", f"_attn2_{attn2_patch_sum}.graph" + ) def process_state_dict_before_saving(state_dict: Dict): nonlocal self, args, kwargs, graph_file @@ -98,7 +101,7 @@ def handle_graph_saving(): parent_dir = os.path.dirname(graph_file) if parent_dir != "": os.makedirs(parent_dir, exist_ok=True) - + # Avoid graph file conflicts if os.path.exists(graph_file): raise FileExistsError(f"File {graph_file} exists!") diff --git a/src/onediff/infer_compiler/import_tools/__init__.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/__init__.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/__init__.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/__init__.py diff --git a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py similarity index 98% rename from src/onediff/infer_compiler/import_tools/dyn_mock_mod.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py index 4cb5fa6fc..8ac3ae0c9 100644 --- a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py +++ b/src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py @@ -10,8 +10,8 @@ from oneflow.mock_torch import enable from oneflow.mock_torch.mock_importer import _importer from .import_module_utils import import_module_from_path -from ..utils.log_utils import logger -from ..utils.patch_for_compiler import * +from onediff.utils import logger +from .patch_for_compiler import * __all__ = ["DynamicMockModule"] diff --git a/src/onediff/infer_compiler/import_tools/format_utils.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/format_utils.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/format_utils.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/format_utils.py diff --git a/src/onediff/infer_compiler/import_tools/import_module_utils.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/import_module_utils.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/import_module_utils.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/import_module_utils.py diff --git a/src/onediff/infer_compiler/import_tools/importer.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py similarity index 99% rename from src/onediff/infer_compiler/import_tools/importer.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py index 0ac9ac4ba..854a7577b 100644 --- a/src/onediff/infer_compiler/import_tools/importer.py +++ b/src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py @@ -9,7 +9,7 @@ from importlib.metadata import requires from .format_utils import MockEntityNameFormatter from .dyn_mock_mod import DynamicMockModule -from ..utils.log_utils import logger +from onediff.utils import logger __all__ = ["LazyMocker", "is_need_mock"] diff --git a/src/onediff/infer_compiler/utils/patch_for_compiler.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py similarity index 97% rename from src/onediff/infer_compiler/utils/patch_for_compiler.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py index 501411d2c..8e7f7e40b 100644 --- a/src/onediff/infer_compiler/utils/patch_for_compiler.py +++ b/src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py @@ -109,13 +109,17 @@ def scaled_dot_product_attention( from oneflow import Tensor + def oneflow_rfloordiv(): - original_rfloordiv = Tensor.__rfloordiv__ + original_rfloordiv = Tensor.__rfloordiv__ + def rfloordiv(self, other): if isinstance(other, int): other = flow.tensor(other) - + return original_rfloordiv(self, other) + return rfloordiv -Tensor.__rfloordiv__ = oneflow_rfloordiv() \ No newline at end of file + +Tensor.__rfloordiv__ = oneflow_rfloordiv() diff --git a/src/onediff/infer_compiler/backends/oneflow.py b/src/onediff/infer_compiler/backends/oneflow/oneflow.py similarity index 79% rename from src/onediff/infer_compiler/backends/oneflow.py rename to src/onediff/infer_compiler/backends/oneflow/oneflow.py index 71b010950..c534a3c08 100644 --- a/src/onediff/infer_compiler/backends/oneflow.py +++ b/src/onediff/infer_compiler/backends/oneflow/oneflow.py @@ -1,5 +1,6 @@ import torch -from .registry import register_backend + +from ..registry import register_backend @register_backend("oneflow") @@ -8,7 +9,7 @@ def compile(torch_module: torch.nn.Module, *, options=None): Transform a torch nn.Module to oneflow.nn.Module, then optimize it with oneflow.nn.Graph. Args: model (torch.nn.Module): Module to optimize - options (CompileOptions): Compilation options to pass to the compiler: + options (OneflowCompileOptions): Compilation options to pass to the compiler: - 'dynamic': When this is True, we will generate one graph and reuse it to avoid recompilations when input shape change. This may not always work as some operations/optimizations break the contition of reusing. When this is False, we will generate a graph for each new input shape, and will always specialize. @@ -19,31 +20,33 @@ def compile(torch_module: torch.nn.Module, *, options=None): - 'graph_file' (None) generates a compilation cache file. If the file exists, loading occurs; if not, the compilation result is saved after the first run. - 'graph_file_device' (None) sets the device for the graph file, default None. If set, the compilation result will be converted to the specified device. """ - from ..oneflow.deployable_module import OneflowDeployableModule - from ..oneflow.utils import get_mixed_deployable_module - from ..transform.custom_transform import set_default_registry - from ..utils import CompileOptions, set_oneflow_env_vars - from ..utils.param_utils import ( + from .deployable_module import OneflowDeployableModule, get_mixed_deployable_module + from .env_var import ( + set_oneflow_default_env_vars, + set_oneflow_env_vars, + OneflowCompileOptions, + ) + from .param_utils import ( state_update_hook, init_state_update_attr, forward_pre_check_and_update_state_hook, forward_generate_constant_folding_info_hook, ) + from .transform.custom_transform import set_default_registry + set_oneflow_default_env_vars() set_default_registry() - options = options if options is not None else CompileOptions() - set_oneflow_env_vars(options.oneflow) + options = options if options is not None else OneflowCompileOptions() + set_oneflow_env_vars(options) def wrap_module(module): if isinstance(module, OneflowDeployableModule): assert not module._is_raw_deployable_module - return module.__class__.from_existing( - module, options.dynamic, options.oneflow - ) + return module.__class__.from_existing(module, options.dynamic, options) else: return get_mixed_deployable_module(module.__class__)( - module, None, options.dynamic, options.oneflow + module, None, options.dynamic, options ) model = wrap_module(torch_module) diff --git a/src/onediff/infer_compiler/utils/oneflow_exec_mode.py b/src/onediff/infer_compiler/backends/oneflow/oneflow_exec_mode.py similarity index 100% rename from src/onediff/infer_compiler/utils/oneflow_exec_mode.py rename to src/onediff/infer_compiler/backends/oneflow/oneflow_exec_mode.py diff --git a/src/onediff/infer_compiler/utils/online_quantization_utils.py b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py similarity index 70% rename from src/onediff/infer_compiler/utils/online_quantization_utils.py rename to src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py index a8fe99fd0..1a537dfc9 100644 --- a/src/onediff/infer_compiler/utils/online_quantization_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py @@ -1,11 +1,15 @@ def patch_input_adapter(in_args, in_kwargs): return in_args, in_kwargs + def online_quantize_model( - model, input_args, input_kwargs, - seed=1, inplace=True, + model, + input_args, + input_kwargs, + seed=1, + inplace=True, module_selector=lambda x: x, - quant_config = None, + quant_config=None, calibration_info=None, ): """Optimize the quantization pipeline. @@ -19,19 +23,23 @@ def online_quantize_model( OnlineQuantModule, create_quantization_calculator, ) + if getattr(quant_config, "quantization_calculator", None): calculator = quant_config.quantization_calculator else: calculator = create_quantization_calculator( - model, quant_config, module_selector, seed, + model, + quant_config, + module_selector, + seed, calibration_info=calibration_info, ) module = OnlineQuantModule(calculator, False, inplace=inplace) - in_args , in_kwargs = patch_input_adapter(input_args, input_kwargs) - quantized_model, info = module.quantize_with_calibration( - *in_args, **in_kwargs - ) + in_args, in_kwargs = patch_input_adapter(input_args, input_kwargs) + quantized_model, info = module.quantize_with_calibration(*in_args, **in_kwargs) status = module.collect_quantization_status(model, info) + for _, layer in quantized_model.named_modules(): + layer._disable_param_update = True return quantized_model, status @@ -42,14 +50,15 @@ def wrapper(self: "DeployableModule", *args, **kwargs): quant_config = self._deployable_module_quant_config if quant_config: torch_model, _ = online_quantize_model( - torch_model, args, kwargs, + torch_model, + args, + kwargs, module_selector=lambda x: x, quant_config=quant_config, inplace=True, ) - self._deployable_module_quant_config = None + self._deployable_module_quant_config = None output = func(self, *args, **kwargs) return output - return wrapper - \ No newline at end of file + return wrapper diff --git a/src/onediff/infer_compiler/utils/param_utils.py b/src/onediff/infer_compiler/backends/oneflow/param_utils.py similarity index 97% rename from src/onediff/infer_compiler/utils/param_utils.py rename to src/onediff/infer_compiler/backends/oneflow/param_utils.py index cbe71d003..c5f53440f 100644 --- a/src/onediff/infer_compiler/utils/param_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/param_utils.py @@ -3,7 +3,7 @@ import oneflow as flow from typing import List, Dict, Any, Union -from .log_utils import logger +from onediff.utils import logger def parse_device(args: List[Any], kwargs: Dict[str, Any]): @@ -80,8 +80,8 @@ def set_constant_folded_conv_attr( def generate_constant_folding_info( deployable_module, torch_module: torch.nn.Module = None ) -> Dict[str, flow.Tensor]: - removeprefix = lambda ss, prefix: ss[len(prefix):] if ss.startswith(prefix) else ss - + removeprefix = lambda ss, prefix: ss[len(prefix) :] if ss.startswith(prefix) else ss + # convert str like 'variable_transpose_model.input_blocks.10.0.in_layers.2.weight_239' # to 'input_blocks.10.0.in_layers.2.weight' def convert_var_name(s: str, prefix="variable_transpose_"): @@ -186,8 +186,9 @@ def forward_pre_check_and_update_state_hook(module, args): update_graph_with_constant_folding_info(module, constant_folding_info) setattr(module._torch_module, STATE_UPDATED_ATTR, False) + def removesuffix(s: str, suffix: str) -> str: if s.endswith(suffix): - return s[:len(s) - len(suffix)] + return s[: len(s) - len(suffix)] else: return s diff --git a/src/onediff/infer_compiler/transform/__init__.py b/src/onediff/infer_compiler/backends/oneflow/transform/__init__.py similarity index 100% rename from src/onediff/infer_compiler/transform/__init__.py rename to src/onediff/infer_compiler/backends/oneflow/transform/__init__.py diff --git a/src/onediff/infer_compiler/transform/builtin_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py similarity index 98% rename from src/onediff/infer_compiler/transform/builtin_transform.py rename to src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py index 9c466a455..83a2b9dd6 100644 --- a/src/onediff/infer_compiler/transform/builtin_transform.py +++ b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py @@ -12,11 +12,12 @@ import oneflow as flow from .manager import transform_mgr -from ..utils.log_utils import logger -from ..utils.patch_for_diffusers import diffusers_checker +from onediff.utils import logger +from .patch_for_diffusers import diffusers_checker from ..import_tools.importer import is_need_mock from .patch_for_comfy import PatchForComfy + __all__ = [ "proxy_class", "ProxySubmodule", @@ -26,6 +27,7 @@ "default_converter", ] + def singledispatch_proxy(func): dispatcher = singledispatch(func) _warning_set = set() @@ -57,10 +59,12 @@ def wrapper(first_param, *args, **kwargs): def proxy_class(cls: type): try: out = transform_mgr.transform_cls(cls) - return out + return out except Exception as e: # If an exception occurs during transformation, print traceback for debugging - raise RuntimeError(f"An exception occurred during class transformation:\n{traceback.format_exc()}\nException: {e}") + raise RuntimeError( + f"An exception occurred during class transformation:\n{traceback.format_exc()}\nException: {e}" + ) def reverse_proxy_class(cls: type): @@ -447,7 +451,7 @@ def _(mod: types.BuiltinFunctionType, verbose=False): if mod_name is not None: m = importlib.import_module(mod_name) return getattr(m, mod.__name__) - + return default_converter(mod, verbose) diff --git a/src/onediff/infer_compiler/transform/custom_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py similarity index 96% rename from src/onediff/infer_compiler/transform/custom_transform.py rename to src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py index 0d0e71f59..feab6000f 100644 --- a/src/onediff/infer_compiler/transform/custom_transform.py +++ b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py @@ -6,7 +6,7 @@ from ..import_tools import import_module_from_path from .manager import transform_mgr from .builtin_transform import torch2oflow -from ..utils.log_utils import logger +from onediff.utils import logger __all__ = ["register"] @@ -49,7 +49,7 @@ def import_module_safely(module_path, module_name): logger.warning(f"Failed to import {module_name} from {module_path}. {e=}") # compiler_registry_path - registry_path = Path(__file__).parents[3] / "infer_compiler_registry" + registry_path = Path(__file__).parents[5] / "infer_compiler_registry" if importlib.util.find_spec("diffusers") is not None: import_module_safely(registry_path / "register_diffusers", "register_diffusers") diff --git a/src/onediff/infer_compiler/transform/manager.py b/src/onediff/infer_compiler/backends/oneflow/transform/manager.py similarity index 99% rename from src/onediff/infer_compiler/transform/manager.py rename to src/onediff/infer_compiler/backends/oneflow/transform/manager.py index df8c65fa4..376b1e881 100644 --- a/src/onediff/infer_compiler/transform/manager.py +++ b/src/onediff/infer_compiler/backends/oneflow/transform/manager.py @@ -5,7 +5,7 @@ import logging from typing import Dict, List, Union from pathlib import Path -from ..utils.log_utils import logger +from onediff.utils import logger from ..import_tools.importer import LazyMocker __all__ = ["transform_mgr"] @@ -117,6 +117,7 @@ def transform_package(self, package_name): if importlib.util.find_spec("pydantic") is not None: import pydantic + if pydantic.VERSION < "2.5.2": logger.warning( f"Pydantic version {pydantic.VERSION} is too low, please upgrade to 2.5.2 or higher." @@ -126,5 +127,3 @@ def transform_package(self, package_name): MockEnableDisableMixin.hazard_list.append( "huggingface_hub.inference._text_generation" ) - - diff --git a/src/onediff/infer_compiler/transform/patch_for_comfy.py b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_comfy.py similarity index 100% rename from src/onediff/infer_compiler/transform/patch_for_comfy.py rename to src/onediff/infer_compiler/backends/oneflow/transform/patch_for_comfy.py diff --git a/src/onediff/infer_compiler/utils/patch_for_diffusers.py b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py similarity index 95% rename from src/onediff/infer_compiler/utils/patch_for_diffusers.py rename to src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py index 1de90c151..e5cb43cbf 100644 --- a/src/onediff/infer_compiler/utils/patch_for_diffusers.py +++ b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py @@ -1,6 +1,6 @@ # TODO: remove this file to diffusers/src/infer_compiler_registry/register_diffusers from abc import ABC, abstractmethod -from .log_utils import logger +from onediff.utils import logger try: import diffusers diff --git a/src/onediff/infer_compiler/nexfort/__init__.py b/src/onediff/infer_compiler/backends/oneflow/utils/__init__.py similarity index 100% rename from src/onediff/infer_compiler/nexfort/__init__.py rename to src/onediff/infer_compiler/backends/oneflow/utils/__init__.py diff --git a/src/onediff/infer_compiler/utils/cost_util.py b/src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py similarity index 99% rename from src/onediff/infer_compiler/utils/cost_util.py rename to src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py index 59a12a36a..4cb1575f5 100644 --- a/src/onediff/infer_compiler/utils/cost_util.py +++ b/src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py @@ -2,7 +2,7 @@ import oneflow as flow import time import inspect -from .log_utils import logger +from onediff.utils import logger __all__ = ["cost_cnt", "cost_time"] diff --git a/src/onediff/infer_compiler/utils/version_util.py b/src/onediff/infer_compiler/backends/oneflow/utils/version_util.py similarity index 96% rename from src/onediff/infer_compiler/utils/version_util.py rename to src/onediff/infer_compiler/backends/oneflow/utils/version_util.py index 58dc6ab08..5e0d22a8e 100644 --- a/src/onediff/infer_compiler/utils/version_util.py +++ b/src/onediff/infer_compiler/backends/oneflow/utils/version_util.py @@ -1,5 +1,5 @@ from importlib_metadata import version -from .log_utils import logger +from onediff.utils import logger def get_support_message(): diff --git a/src/onediff/infer_compiler/backends/registry.py b/src/onediff/infer_compiler/backends/registry.py index 46c1234cd..bbf0e24bf 100644 --- a/src/onediff/infer_compiler/backends/registry.py +++ b/src/onediff/infer_compiler/backends/registry.py @@ -28,23 +28,15 @@ def lookup_backend(compiler_fn): """Expand backend strings to functions""" if isinstance(compiler_fn, str): if compiler_fn not in _BACKENDS: - _lazy_import() + _lazy_import(compiler_fn) if compiler_fn not in _BACKENDS: raise RuntimeError(f"invalid backend {compiler_fn}") compiler_fn = _BACKENDS[compiler_fn] return compiler_fn -@functools.lru_cache(None) -def _lazy_import(): +def _lazy_import(backend_name): from .. import backends - def import_submodule(mod: types.ModuleType): - """ - Ensure all the files in a given submodule are imported - """ - for filename in sorted(os.listdir(os.path.dirname(cast(str, mod.__file__)))): - if filename.endswith(".py") and filename[0] != "_": - importlib.import_module(f"{mod.__name__}.{filename[:-3]}") - - import_submodule(backends) + backend_path = f"{backends.__name__}.{backend_name}" + importlib.import_module(backend_path) diff --git a/src/onediff/infer_compiler/core/__init__.py b/src/onediff/infer_compiler/core/__init__.py deleted file mode 100644 index 2c2324087..000000000 --- a/src/onediff/infer_compiler/core/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .deployable_module import DeployableModule -from .with_onediff_compile import compile, oneflow_compile diff --git a/src/onediff/infer_compiler/oneflow/__init__.py b/src/onediff/infer_compiler/oneflow/__init__.py deleted file mode 100644 index 6066ae13e..000000000 --- a/src/onediff/infer_compiler/oneflow/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .deployable_module import OneflowDeployableModule diff --git a/src/onediff/infer_compiler/oneflow/config.py b/src/onediff/infer_compiler/oneflow/config.py deleted file mode 100644 index 0e1d2f543..000000000 --- a/src/onediff/infer_compiler/oneflow/config.py +++ /dev/null @@ -1,148 +0,0 @@ -import os -from typing import Optional -import dataclasses -from ..utils import ( - parse_boolean_from_env, - set_boolean_env_var, - parse_integer_from_env, - set_integer_env_var, -) - - -def init_default_env(): - # ONEFLOW_RUN_GRAPH_BY_VM must set here to enable nn.Graph init with vm run - os.environ.setdefault("ONEFLOW_RUN_GRAPH_BY_VM", "1") - os.environ.setdefault("ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", "1") - - os.environ.setdefault("ONEFLOW_MLIR_CSE", "1") - os.environ.setdefault("ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", "1") - os.environ.setdefault("ONEFLOW_MLIR_ENABLE_ROUND_TRIP", "1") - os.environ.setdefault("ONEFLOW_MLIR_FUSE_FORWARD_OPS", "1") - os.environ.setdefault("ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", "1") - os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL", "1") - os.environ.setdefault("ONEFLOW_MLIR_PREFER_NHWC", "1") - - os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", "1") - os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", "1") - os.environ.setdefault("ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1") - os.environ.setdefault("ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1") - os.environ.setdefault("ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", "1") - os.environ.setdefault("ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", "1") - os.environ.setdefault("ONEFLOW_CONVOLUTION_BIAS_ADD_ACT_FUSION", "1") - # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", "0") - # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", "0") - # os.environ.setdefault("ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", "0") - - os.environ.setdefault("ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", "1") - os.environ.setdefault("ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", "1") - os.environ.setdefault("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "1") - # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", "1") - # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", "-1") - # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_QUANTIZATION", "1") - - os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL_QUANT", "1") - os.environ.setdefault("ONEFLOW_CONV2D_KERNEL_ENABLE_TUNING_WARMUP", "1") - # TODO: enable this will cause the failure of multi resolution warmup - # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1") - # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1") - - -@dataclasses.dataclass -class OneFlowCompilerConfig: - run_graph_by_vm: Optional[bool] = None - graph_delay_variable_op_execution: Optional[bool] = None - - mlir_cse: Optional[bool] = None - mlir_enable_inference_optimization: Optional[bool] = None - mlir_enable_round_trip: Optional[bool] = None - mlir_fuse_forward_ops: Optional[bool] = None - mlir_fuse_ops_with_backward_impl: Optional[bool] = None - mlir_group_matmul: Optional[bool] = None - mlir_prefer_nhwc: Optional[bool] = None - mlir_fuse_kernel_launch: Optional[bool] = None - - kernel_enable_cuda_graph: Optional[bool] = None - kernel_enable_fused_conv_bias: Optional[bool] = None - kernel_enable_fused_linear: Optional[bool] = None - kernel_conv_cutlass_impl_enable_tuning_warmup: Optional[bool] = None - kernel_gemm_cutlass_impl_enable_tuning_warmup: Optional[bool] = None - kernel_conv_enable_cutlass_impl: Optional[bool] = None - kernel_gemm_enable_cutlass_impl: Optional[bool] = None - kernel_glu_enable_dual_gemm_impl: Optional[bool] = None - kernel_glu_enable_y_gemm_impl: Optional[bool] = None - kernel_glu_quant_enable_dual_gemm_impl: Optional[bool] = None - - conv_allow_half_precision_accumulation: Optional[bool] = None - matmul_allow_half_precision_accumulation: Optional[bool] = None - linear_embedding_skip_init: Optional[bool] = None - attention_allow_half_precision_accumulation: Optional[bool] = None - attention_allow_half_precision_score_accumulation_max_m: Optional[int] = None - attention_allow_quantization: Optional[bool] = None - conv2d_kernel_enable_tuning_warmup: Optional[bool] = None - - attr2env_var = { - "run_graph_by_vm": "ONEFLOW_RUN_GRAPH_BY_VM", - "graph_delay_variable_op_execution": "ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", - "mlir_cse": "ONEFLOW_MLIR_CSE", - "mlir_enable_inference_optimization": "ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", - "mlir_enable_round_trip": "ONEFLOW_MLIR_ENABLE_ROUND_TRIP", - "mlir_fuse_forward_ops": "ONEFLOW_MLIR_FUSE_FORWARD_OPS", - "mlir_fuse_ops_with_backward_impl": "ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", - "mlir_group_matmul": "ONEFLOW_MLIR_GROUP_MATMUL", - "mlir_prefer_nhwc": "ONEFLOW_MLIR_PREFER_NHWC", - "mlir_fuse_kernel_launch": "ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", - "kernel_enable_cuda_graph": "ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", - "kernel_enable_fused_conv_bias": "ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", - "kernel_enable_fused_linear": "ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", - "kernel_conv_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", - "kernel_gemm_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", - "kernel_conv_enable_cutlass_impl": "ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", - "kernel_gemm_enable_cutlass_impl": "ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", - "kernel_glu_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", - "kernel_glu_enable_y_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", - "kernel_glu_quant_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", - "conv_allow_half_precision_accumulation": "ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", - "matmul_allow_half_precision_accumulation": "ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", - "linear_embedding_skip_init": "ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", - "attention_allow_half_precision_accumulation": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", - "attention_allow_half_precision_score_accumulation_max_m": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", - "conv2d_kernel_enable_tuning_warmup":'ONEFLOW_CONV2D_KERNEL_ENABLE_TUNING_WARMUP', - } - - def __post_init__(self): - fields = dataclasses.fields(self) - fields = {field.name: field for field in fields} - for name in self.attr2env_var: - if fields[name].type in (bool, Optional[bool]): - super().__setattr__( - name, parse_boolean_from_env(self.attr2env_var[name]) - ) - elif fields[name].type in (int, Optional[int]): - super().__setattr__( - name, parse_integer_from_env(self.attr2env_var[name]) - ) - else: - raise ValueError( - f"Unsupported type {dataclasses.fields(self)[name].type}" - ) - - super().__setattr__("_initialized", True) - - def __setattr__(self, name, value): - super().__setattr__(name, value) - if getattr(self, "_initialized", False) and name in self.attr2env_var: - fields = dataclasses.fields(self) - fields = dataclasses.fields(self) - fields = {field.name: field for field in fields} - if fields[name].type in (bool, Optional[bool]): - set_boolean_env_var(self.attr2env_var[name], value) - elif fields[name].type in (int, Optional[int]): - set_integer_env_var(self.attr2env_var[name], value) - else: - raise ValueError( - f"Unsupported type {dataclasses.fields(self)[name].type}" - ) - - -init_default_env() -oneflow_compiler_config = OneFlowCompilerConfig() diff --git a/src/onediff/infer_compiler/oneflow/utils.py b/src/onediff/infer_compiler/oneflow/utils.py deleted file mode 100644 index 4a5e899aa..000000000 --- a/src/onediff/infer_compiler/oneflow/utils.py +++ /dev/null @@ -1,83 +0,0 @@ -from functools import wraps - -from ..transform.builtin_transform import torch2oflow -from ..transform.manager import transform_mgr -from ..utils.log_utils import logger -from .dual_module import DualModule - - -@torch2oflow.register -def _(mod: DualModule, verbose=False): - return torch2oflow(mod._torch_module, verbose) - - -def handle_deployable_exception(func): - @wraps(func) - def wrapper(self, *args, **kwargs): - if transform_mgr.debug_mode: - return func(self, *args, **kwargs) - else: - try: - return func(self, *args, **kwargs) - except Exception as e: - logger.error(f"Exception in {func.__name__}: {e=}") - logger.warning("Recompile oneflow module ...") - del self._deployable_module_model.oneflow_module - self._deployable_module_dpl_graph = None - return func(self, *args, **kwargs) - - return wrapper - - -def get_mixed_dual_module(module_cls): - if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__: - return module_cls - - class MixedDualModule(DualModule, module_cls): - def __init__(self, torch_module, oneflow_module): - while isinstance(torch_module, DualModule): - torch_module = torch_module._torch_module - DualModule.__init__(self, torch_module, oneflow_module) - - def _get_name(self) -> str: - return f"{self.__class__.__name__}(of {module_cls.__name__})" - - return MixedDualModule - - -# Return a OneflowDeployableModule that using module_cls as it's parent class. -def get_mixed_deployable_module(module_cls): - from .deployable_module import OneflowDeployableModule - - class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls): - def __init__(self, torch_module, oneflow_module, dynamic=True, options=None): - OneflowDeployableModule.__init__( - self, torch_module, oneflow_module, dynamic, options - ) - self._is_raw_deployable_module = False - - @classmethod - def from_existing(cls, existing_module, dynamic=True, options=None): - torch_module = existing_module._deployable_module_model._torch_module - oneflow_module = existing_module._deployable_module_model._oneflow_module - instance = cls(torch_module, oneflow_module, dynamic, options) - instance._deployable_module_dpl_graph = None - if hasattr(existing_module, "_deployable_module_dpl_graph"): - instance._deployable_module_dpl_graph = ( - existing_module._deployable_module_dpl_graph - ) - return instance - - def _get_name(self): - return f"{self.__class__.__name__}(of {module_cls.__name__})" - - return MixedOneflowDeployableModule - - -def get_oneflow_graph(model, size=9, dynamic_graph=True): - from .graph import OneflowGraph - - g = OneflowGraph(model) - g._dynamic_input_graph_cache.set_cache_size(size) - g._dynamic_input_graph_cache.enable_shared(dynamic_graph) - return g diff --git a/src/onediff/infer_compiler/utils/__init__.py b/src/onediff/infer_compiler/utils/__init__.py deleted file mode 100644 index 076b41bcd..000000000 --- a/src/onediff/infer_compiler/utils/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled -from .env_var import ( - parse_boolean_from_env, - set_boolean_env_var, - parse_integer_from_env, - set_integer_env_var, - set_oneflow_env_vars, - set_oneflow_default_env_vars, - set_nexfort_env_vars, - set_nexfort_default_env_vars, - set_default_env_vars, -) -from .model_inplace_assign import TensorInplaceAssign -from .version_util import ( - get_support_message, - is_quantization_enabled, - is_community_version, -) -from .options import * diff --git a/src/onediff/infer_compiler/utils/options.py b/src/onediff/infer_compiler/utils/options.py deleted file mode 100644 index f96e83e37..000000000 --- a/src/onediff/infer_compiler/utils/options.py +++ /dev/null @@ -1,92 +0,0 @@ -import dataclasses -from typing import Dict -import torch - - -@dataclasses.dataclass -class OneflowCompileOptions: - use_graph: bool = True - debug_level: int = -1 - max_cached_graph_size: int = 9 - graph_file: str = None - graph_file_device: torch.device = None - - # Optimization related environment variables - run_graph_by_vm: bool = None - graph_delay_variable_op_execution: bool = None - - conv_allow_half_precision_accumulation: bool = None - matmul_allow_half_precision_accumulation: bool = None - attention_allow_half_precision_accumulation: bool = None - attention_allow_half_precision_score_accumulation_max_m: int = None - attention_allow_quantization: bool = None - - mlir_cse: bool = None - mlir_enable_inference_optimization: bool = None - mlir_enable_round_trip: bool = None - mlir_fuse_forward_ops: bool = None - mlir_fuse_ops_with_backward_impl: bool = None - mlir_group_matmul: bool = None - mlir_prefer_nhwc: bool = None - mlir_fuse_kernel_launch: bool = None - - kernel_enable_cuda_graph: bool = None - kernel_enable_fused_conv_bias: bool = None - kernel_enable_fused_linear: bool = None - kernel_conv_cutlass_impl_enable_tuning_warmup: bool = None - kernel_enable_conv2d_tuning_warmup: bool = None - kernel_gemm_cutlass_impl_enable_tuning_warmup: bool = None - kernel_conv_enable_cutlass_impl: bool = None - kernel_gemm_enable_cutlass_impl: bool = None - kernel_glu_enable_dual_gemm_impl: bool = None - kernel_glu_enable_y_gemm_impl: bool = None - kernel_glu_quant_enable_dual_gemm_impl: bool = None - - -@dataclasses.dataclass -class NexfortInductorCompileOptions: - disable: bool = False - mode: str = None - options: Dict = dataclasses.field(default_factory=dict) - - -@dataclasses.dataclass -class NexfortCompileOptions: - memory_format: torch.memory_format - fuse_qkv_projections: bool - inductor: NexfortInductorCompileOptions - - def __init__( - self, - memory_format=torch.channels_last, - fuse_qkv_projections=True, - inductor=None, - ): - if isinstance(memory_format, str): - memory_format = getattr(torch, memory_format) - self.memory_format = memory_format - self.fuse_qkv_projections = fuse_qkv_projections - self.inductor = ( - inductor if inductor is not None else NexfortInductorCompileOptions() - ) - - -@dataclasses.dataclass -class CompileOptions: - # common options - dynamic: bool - - # oneflow specific options - oneflow: OneflowCompileOptions - - # nexfort specific options - nexfort: NexfortCompileOptions - - def __init__(self, dynamic=True, oneflow=None, nexfort=None): - self.dynamic = dynamic - self.oneflow = oneflow if oneflow is not None else OneflowCompileOptions() - self.nexfort = nexfort if nexfort is not None else NexfortCompileOptions() - - -# a global default compile options -_GLOBAL_compile_options = CompileOptions() diff --git a/src/onediff/optimization/attention_processor.py b/src/onediff/optimization/attention_processor.py index 22650ab62..c57dcc602 100644 --- a/src/onediff/optimization/attention_processor.py +++ b/src/onediff/optimization/attention_processor.py @@ -84,7 +84,7 @@ def __call__( hidden_states = flow.bmm(attention_probs, value) hidden_states = attn.batch_to_head_dim(hidden_states) else: - from ..infer_compiler.utils import ( + from onediff.utils import ( parse_boolean_from_env, set_boolean_env_var, ) @@ -123,7 +123,7 @@ def __call__( try: - from onediff.infer_compiler.transform import register + from onediff.infer_compiler.backends.oneflow.transform import register def convert_fused_self_attn_processor( mod: FusedSelfAttnProcessor, verbose=True @@ -132,4 +132,4 @@ def convert_fused_self_attn_processor( register(torch2oflow_funcs=convert_fused_self_attn_processor) except: - print("Skip onediff.infer_compiler.transform.register") + print("Skip onediff.infer_compiler.backends.oneflow.transform.register") diff --git a/src/onediff/optimization/quant_optimizer.py b/src/onediff/optimization/quant_optimizer.py index 24a104dfc..9a00b883b 100644 --- a/src/onediff/optimization/quant_optimizer.py +++ b/src/onediff/optimization/quant_optimizer.py @@ -2,11 +2,13 @@ import torch import torch.nn as nn from copy import deepcopy -from ..infer_compiler.utils.log_utils import logger -from ..infer_compiler.utils.version_util import is_quantization_enabled -from ..infer_compiler.utils.cost_util import cost_cnt -from ..infer_compiler.utils.module_operations import modify_sub_module -from ..infer_compiler.transform.manager import transform_mgr +from onediff.utils import logger +from onediff.infer_compiler.backends.oneflow.utils.version_util import ( + is_quantization_enabled, +) +from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt +from onediff.infer_compiler.backends.oneflow.transform.manager import transform_mgr +from onediff.torch_utils.module_operations import modify_sub_module __all__ = ["quantize_model", "varify_can_use_quantization"] @@ -107,4 +109,3 @@ def apply_quantization_to_modules(quantizable_modules): ) return model - diff --git a/src/onediff/quantization/load_quantized_model.py b/src/onediff/quantization/load_quantized_model.py index 9500aa314..913466137 100644 --- a/src/onediff/quantization/load_quantized_model.py +++ b/src/onediff/quantization/load_quantized_model.py @@ -1,22 +1,30 @@ from diffusers import AutoPipelineForText2Image from onediff.quantization.quantize_pipeline import QuantPipeline -import argparse +import argparse import torch from onediff.infer_compiler import oneflow_compile + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("--prompt", default="a photo of an astronaut riding a horse on mars") - parser.add_argument("--height", type= int,default=1024) - parser.add_argument("--width", type= int, default=1024) + parser.add_argument( + "--prompt", default="a photo of an astronaut riding a horse on mars" + ) + parser.add_argument("--height", type=int, default=1024) + parser.add_argument("--width", type=int, default=1024) parser.add_argument("--num_inference_steps", type=int, default=30) parser.add_argument("--quantized_model", type=str, required=True) return parser.parse_args() + args = parse_args() pipe = QuantPipeline.from_quantized( - AutoPipelineForText2Image, args.quantized_model, torch_dtype=torch.float16, variant="fp16", use_safetensors=True + AutoPipelineForText2Image, + args.quantized_model, + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True, ) pipe = pipe.to("cuda") diff --git a/src/onediff/quantization/quant_pipeline_test.py b/src/onediff/quantization/quant_pipeline_test.py index c68589fbc..a23efd134 100644 --- a/src/onediff/quantization/quant_pipeline_test.py +++ b/src/onediff/quantization/quant_pipeline_test.py @@ -1,15 +1,19 @@ from diffusers import AutoPipelineForText2Image from onediff.quantization.quantize_pipeline import QuantPipeline import torch -import argparse +import argparse def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("--floatting_model_path", default="runwayml/stable-diffusion-v1-5") - parser.add_argument("--prompt", default="a photo of an astronaut riding a horse on mars") - parser.add_argument("--height",type=int, default=1024) - parser.add_argument("--width", type=int,default=1024) + parser.add_argument( + "--floatting_model_path", default="runwayml/stable-diffusion-v1-5" + ) + parser.add_argument( + "--prompt", default="a photo of an astronaut riding a horse on mars" + ) + parser.add_argument("--height", type=int, default=1024) + parser.add_argument("--width", type=int, default=1024) parser.add_argument("--num_inference_steps", type=int, default=30) parser.add_argument("--conv_compute_density_threshold", type=int, default=900) parser.add_argument("--linear_compute_density_threshold", type=int, default=300) @@ -20,10 +24,15 @@ def parse_args(): parser.add_argument("--quantized_model", default="./quantized_model") return parser.parse_args() + args = parse_args() pipe = QuantPipeline.from_pretrained( - AutoPipelineForText2Image, args.floatting_model_path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True + AutoPipelineForText2Image, + args.floatting_model_path, + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True, ) pipe.to("cuda") @@ -34,13 +43,15 @@ def parse_args(): num_inference_steps=args.num_inference_steps, ) -pipe.quantize(**pipe_kwargs, +pipe.quantize( + **pipe_kwargs, conv_compute_density_threshold=args.conv_compute_density_threshold, linear_compute_density_threshold=args.linear_compute_density_threshold, conv_ssim_threshold=args.conv_ssim_threshold, linear_ssim_threshold=args.linear_ssim_threshold, save_as_float=args.save_as_float, plot_calibrate_info=False, - cache_dir=args.cache_dir) + cache_dir=args.cache_dir +) pipe.save_quantized(args.quantized_model, safe_serialization=True) diff --git a/src/onediff/quantization/quantize_utils.py b/src/onediff/quantization/quantize_utils.py index 678787586..9b0b58022 100644 --- a/src/onediff/quantization/quantize_utils.py +++ b/src/onediff/quantization/quantize_utils.py @@ -17,7 +17,7 @@ def load_calibration_and_quantize_pipeline(calibration_path, pipe): store = CalibrationStorage() calibrate_info = store.load_from_file(file_path=calibration_path) - + for sub_module_name, sub_calibrate_info in calibrate_info.items(): replace_sub_module_with_quantizable_module( pipe.unet, diff --git a/src/onediff/torch_utils/__init__.py b/src/onediff/torch_utils/__init__.py new file mode 100644 index 000000000..5a82505fa --- /dev/null +++ b/src/onediff/torch_utils/__init__.py @@ -0,0 +1 @@ +from .model_inplace_assign import TensorInplaceAssign diff --git a/src/onediff/infer_compiler/utils/model_inplace_assign.py b/src/onediff/torch_utils/model_inplace_assign.py similarity index 98% rename from src/onediff/infer_compiler/utils/model_inplace_assign.py rename to src/onediff/torch_utils/model_inplace_assign.py index f61276f5b..c8edc6a6d 100644 --- a/src/onediff/infer_compiler/utils/model_inplace_assign.py +++ b/src/onediff/torch_utils/model_inplace_assign.py @@ -10,7 +10,7 @@ class TensorInplaceAssign: r""" This class is used as a context manager, instantiated with either a `torch.nn.Module` or - `onediff.infer_compiler.deployable_module.DeployableModule` during initialization. + `onediff.infer_compiler.backends.deployable_module.DeployableModule` during initialization. Within the context manager, all Tensors associated with the provided module will be transformed into AutoInplaceCopyTensor. After transformed, assignments to Tensor.data are modified to in-place copying. diff --git a/src/onediff/infer_compiler/utils/module_operations.py b/src/onediff/torch_utils/module_operations.py similarity index 99% rename from src/onediff/infer_compiler/utils/module_operations.py rename to src/onediff/torch_utils/module_operations.py index c31856227..04cac3e58 100644 --- a/src/onediff/infer_compiler/utils/module_operations.py +++ b/src/onediff/torch_utils/module_operations.py @@ -16,7 +16,7 @@ def get_sub_module(module, sub_module_name) -> nn.Module: """ if sub_module_name == "": return module - + parts = sub_module_name.split(".") current_module = module diff --git a/src/onediff/utils/__init__.py b/src/onediff/utils/__init__.py new file mode 100644 index 000000000..631812a59 --- /dev/null +++ b/src/onediff/utils/__init__.py @@ -0,0 +1,7 @@ +from .log_utils import logger +from .env_var import ( + parse_boolean_from_env, + set_boolean_env_var, + parse_integer_from_env, + set_integer_env_var, +) diff --git a/src/onediff/utils/env_var.py b/src/onediff/utils/env_var.py new file mode 100644 index 000000000..23b6e749b --- /dev/null +++ b/src/onediff/utils/env_var.py @@ -0,0 +1,31 @@ +import os +from typing import Optional + + +def parse_boolean_from_env(env_var, default_value=None): + env_var = os.getenv(env_var) + if env_var is None: + return default_value + env_var = env_var.lower() + return env_var in ("1", "true", "yes", "on", "y") + + +def set_boolean_env_var(env_var: str, val: Optional[bool]): + if val is None: + os.environ.pop(env_var, None) + else: + os.environ[env_var] = "1" if val else "0" + + +def parse_integer_from_env(env_var, default_value=None): + env_var = os.getenv(env_var) + if env_var is None: + return default_value + return int(env_var) + + +def set_integer_env_var(env_var: str, val: Optional[int]): + if val is None: + os.environ.pop(env_var, None) + else: + os.environ[env_var] = str(int(val)) diff --git a/src/onediff/infer_compiler/utils/log_utils.py b/src/onediff/utils/log_utils.py similarity index 100% rename from src/onediff/infer_compiler/utils/log_utils.py rename to src/onediff/utils/log_utils.py diff --git a/tests/comfy-docker-compose.yml b/tests/comfy-docker-compose.yml index a8a7a7171..99f4d7f4d 100644 --- a/tests/comfy-docker-compose.yml +++ b/tests/comfy-docker-compose.yml @@ -33,6 +33,9 @@ services: - $HOME/test-container-cache-${CONTAINER_NAME}/dot-cache:/root/.cache - /share_nfs:/share_nfs:ro - ${PWD}/${COMFYUI_SRC_DIR}:/app/ComfyUI + - /share_nfs/hf_models/comfyui_resources/custom_nodes/ComfyUI_IPAdapter_plus:/app/ComfyUI/custom_nodes/ComfyUI_IPAdapter_plus + - /share_nfs/hf_models/comfyui_resources/input/input_image_vermeer.png:/app/ComfyUI/input/input_image_vermeer.png:ro + - /share_nfs/hf_models/comfyui_resources/input/a_car.png:/app/ComfyUI/input/a_car.png:ro - ${PWD}/onediff_comfy_nodes:/app/ComfyUI/custom_nodes/onediff_comfy_nodes - ${SDXL_BASE}:/app/ComfyUI/models/checkpoints/sd_xl_base_1.0.safetensors:ro - ${UNET_INT8}:/app/ComfyUI/models/unet_int8/unet_int8:ro diff --git a/tests/comfyui/extra_model_paths.yaml b/tests/comfyui/extra_model_paths.yaml index 88abe1cdd..007a395e9 100644 --- a/tests/comfyui/extra_model_paths.yaml +++ b/tests/comfyui/extra_model_paths.yaml @@ -9,13 +9,14 @@ comfyui: # checkpoints: /home/fengwen/workspace/test_checkpoints checkpoints: /share_nfs/hf_models/comfyui_resources/checkpoints clip: models/clip/ - clip_vision: models/clip_vision/ + clip_vision: /share_nfs/hf_models/comfyui_resources/clip_vision configs: models/configs/ controlnet: models/controlnet/ embeddings: models/embeddings/ - loras: models/loras/ + loras: /share_nfs/hf_models/comfyui_resources/loras/ upscale_models: models/upscale_models/ vae: models/vae/ + ipadapter: /share_nfs/hf_models/comfyui_resources/ipadapter #other_ui: # base_path: path/to/ui diff --git a/tests/comfyui/test_by_ui.py b/tests/comfyui/test_by_ui.py index 6b8253222..9ee92836a 100644 --- a/tests/comfyui/test_by_ui.py +++ b/tests/comfyui/test_by_ui.py @@ -174,9 +174,14 @@ def launch_prompt(driver): print(f"launch the queue prompt (timeout: {args.timeout}s) ...") launch_and_wait(driver, timeout=args.timeout) + + duration = time.time() - start_time print( - f"{args.workflow} has finished, time elapsed: {time.time() - start_time:.1f}" + f"{args.workflow} has finished, time elapsed: {duration:.1f}" ) + + if duration < 2: + raise ValueError("Execution duration is too short, possible error in workflow execution") print(f"check if error occurs...") check_error_occurs(driver) diff --git a/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json b/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json index 55694121d..1af8e9adb 100644 --- a/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json +++ b/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json @@ -40,44 +40,6 @@ "text, watermark" ] }, - { - "id": 6, - "type": "CLIPTextEncode", - "pos": [ - 515, - 318 - ], - "size": { - "0": 422.84503173828125, - "1": 164.31304931640625 - }, - "flags": {}, - "order": 3, - "mode": 0, - "inputs": [ - { - "name": "clip", - "type": "CLIP", - "link": 3 - } - ], - "outputs": [ - { - "name": "CONDITIONING", - "type": "CONDITIONING", - "links": [ - 4 - ], - "slot_index": 0 - } - ], - "properties": { - "Node name for S&R": "CLIPTextEncode" - }, - "widgets_values": [ - "beautiful scenery nature glass bottle landscape, , purple galaxy bottle," - ] - }, { "id": 3, "type": "KSampler", @@ -129,7 +91,7 @@ "Node name for S&R": "KSampler" }, "widgets_values": [ - 156680208700286, + 371953008319175, "randomize", 20, 8, @@ -179,51 +141,29 @@ } }, { - "id": 4, - "type": "CheckpointLoaderSimple", + "id": 9, + "type": "SaveImage", "pos": [ - 100, + 1765.2780151367188, 130 ], "size": { - "0": 315, - "1": 98 + "0": 241.92205810546875, + "1": 270 }, "flags": {}, - "order": 0, + "order": 8, "mode": 0, - "outputs": [ - { - "name": "MODEL", - "type": "MODEL", - "links": [ - 10 - ], - "slot_index": 0 - }, - { - "name": "CLIP", - "type": "CLIP", - "links": [ - 3, - 5 - ], - "slot_index": 1 - }, + "inputs": [ { - "name": "VAE", - "type": "VAE", - "links": [ - 8 - ], - "slot_index": 2 + "name": "images", + "type": "IMAGE", + "link": 9 } ], - "properties": { - "Node name for S&R": "CheckpointLoaderSimple" - }, + "properties": {}, "widgets_values": [ - "sd_xl_base_1.0.safetensors" + "model-speedup" ] }, { @@ -238,7 +178,7 @@ "1": 106 }, "flags": {}, - "order": 1, + "order": 0, "mode": 0, "outputs": [ { @@ -254,47 +194,55 @@ "Node name for S&R": "EmptyLatentImage" }, "widgets_values": [ - 1024, - 1024, + 512, + 512, 1 ] }, { - "id": 9, - "type": "SaveImage", + "id": 12, + "type": "ModelGraphSaver", "pos": [ - 1765.2780151367188, - 130 + 1457, + 353 ], "size": { - "0": 241.92205810546875, - "1": 58 + "0": 315, + "1": 78 }, "flags": {}, - "order": 8, + "order": 7, "mode": 0, "inputs": [ { - "name": "images", - "type": "IMAGE", - "link": 9 + "name": "samples", + "type": "LATENT", + "link": 13, + "slot_index": 0 + }, + { + "name": "model", + "type": "MODEL", + "link": 14 } ], - "properties": {}, + "properties": { + "Node name for S&R": "ModelGraphSaver" + }, "widgets_values": [ - "model-speedup" + "sd1.5-unet" ] }, { "id": 10, "type": "ModelSpeedup", "pos": [ - 515, - 130 + 482, + 133 ], "size": { "0": 315, - "1": 58 + "1": 78 }, "flags": {}, "order": 2, @@ -305,6 +253,11 @@ "type": "MODEL", "link": 10, "slot_index": 0 + }, + { + "name": "custom_booster", + "type": "CUSTOM_BOOSTER", + "link": null } ], "outputs": [ @@ -323,41 +276,93 @@ "Node name for S&R": "ModelSpeedup" }, "widgets_values": [ - "enable" + true ] }, { - "id": 12, - "type": "ModelGraphSaver", + "id": 6, + "type": "CLIPTextEncode", "pos": [ - 1457, - 353 + 564, + 357 ], "size": { - "0": 315, - "1": 78 + "0": 422.84503173828125, + "1": 164.31304931640625 }, "flags": {}, - "order": 7, + "order": 3, "mode": 0, "inputs": [ { - "name": "samples", - "type": "LATENT", - "link": 13, + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 4 + ], "slot_index": 0 - }, + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "beautiful scenery nature glass bottle landscape, , purple galaxy bottle," + ] + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 100, + 130 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ { - "name": "model", + "name": "MODEL", "type": "MODEL", - "link": 14 + "links": [ + 10 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 8 + ], + "slot_index": 2 } ], "properties": { - "Node name for S&R": "ModelGraphSaver" + "Node name for S&R": "CheckpointLoaderSimple" }, "widgets_values": [ - "sdxl-unet" + "sd_xl_base_1.0.safetensors" ] } ], @@ -461,6 +466,14 @@ ], "groups": [], "config": {}, - "extra": {}, + "extra": { + "ds": { + "scale": 0.8390545288824038, + "offset": { + "0": -249.2752477861829, + "1": 56.895086690219436 + } + } + }, "version": 0.4 } \ No newline at end of file diff --git a/tests/convert_torch_to_of/test_patch_for_compiling.py b/tests/convert_torch_to_of/test_patch_for_compiling.py index 6df8ac758..21844a4fa 100644 --- a/tests/convert_torch_to_of/test_patch_for_compiling.py +++ b/tests/convert_torch_to_of/test_patch_for_compiling.py @@ -6,7 +6,7 @@ """ import pytest import numpy as np -from onediff.infer_compiler.utils.patch_for_compiler import FakeCuda +from onediff.infer_compiler.backends.oneflow.import_tools.patch_for_compiler import FakeCuda @pytest.mark.parametrize("batch_size", [8]) diff --git a/tests/convert_torch_to_of/test_torch2of_demo.py b/tests/convert_torch_to_of/test_torch2of_demo.py index eabb63f1e..df4eb5202 100644 --- a/tests/convert_torch_to_of/test_torch2of_demo.py +++ b/tests/convert_torch_to_of/test_torch2of_demo.py @@ -9,7 +9,7 @@ import unittest import numpy as np from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr class PyTorchModel(torch.nn.Module): diff --git a/tests/sd-webui/cat.png b/tests/sd-webui/cat.png new file mode 100644 index 000000000..e610e7081 Binary files /dev/null and b/tests/sd-webui/cat.png differ diff --git a/tests/sd-webui/test_api.py b/tests/sd-webui/test_api.py new file mode 100644 index 000000000..21c1f0719 --- /dev/null +++ b/tests/sd-webui/test_api.py @@ -0,0 +1,95 @@ +import base64 +import pytest +import requests +from pathlib import Path + +def encode_file_to_base64(path): + with open(path, "rb") as file: + return base64.b64encode(file.read()).decode("utf-8") + +def post_request(url, data): + response = requests.post(url, json=data) + assert response.status_code == 200 + return response + +@pytest.fixture() +def base_url(): + return f"http://127.0.0.1:7860" + +@pytest.fixture() +def url_txt2img(base_url): + return f"{base_url}/sdapi/v1/txt2img" + +@pytest.fixture() +def url_img2img(base_url): + return f"{base_url}/sdapi/v1/img2img" + +@pytest.fixture() +def url_set_config(base_url): + return f"{base_url}/sdapi/v1/options" + +@pytest.fixture() +def simple_txt2img_request(): + return { + "prompt": "1girl", + "negative_prompt": "", + "seed": 1, + "steps": 20, + "width": 1024, + "height": 1024, + "cfg_scale": 7, + "n_iter": 1, + "batch_size": 1, + + # Enable OneDiff speed up + "script_name": "onediff_diffusion_model", + + "script_args" : [ + False, # quantization + None, # graph_checkpoint + "", # saved_graph_name + ], + } + +def test_txt2img_onediff(url_txt2img, simple_txt2img_request): + data = simple_txt2img_request + post_request(url_txt2img, data) + +def test_img2img_onediff(url_img2img, simple_txt2img_request): + img_path = str(Path(__file__).parent / "cat.png") + init_images = {"init_images": [encode_file_to_base64(img_path)]} + data = {**simple_txt2img_request, **init_images} + post_request(url_img2img, data) + +def test_txt2img_onediff_quant(url_txt2img, simple_txt2img_request): + script_args = { + "script_args": [ + True, # quantization + None, # graph_checkpoint + "saved_graph", # saved_graph_name + ] + } + data = {**simple_txt2img_request, **script_args} + post_request(url_txt2img, data) + +def test_txt2img_onediff_save_graph(url_txt2img, simple_txt2img_request): + script_args = { + "script_args": [ + False, # quantization + None, # graph_checkpoint + "saved_graph", # saved_graph_name + ] + } + data = {**simple_txt2img_request, **script_args} + post_request(url_txt2img, data) + +def test_txt2img_onediff_load_graph(url_txt2img, simple_txt2img_request): + script_args = { + "script_args": [ + False, # quantization + "saved_graph", # graph_checkpoint + "", # saved_graph_name + ] + } + data = {**simple_txt2img_request, **script_args} + post_request(url_txt2img, data) diff --git a/tests/test_dual_module_list.py b/tests/test_dual_module_list.py index 96e686b2a..28a711404 100644 --- a/tests/test_dual_module_list.py +++ b/tests/test_dual_module_list.py @@ -1,6 +1,6 @@ import numpy as np from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register import torch import torch.nn as nn import oneflow as flow @@ -39,7 +39,7 @@ def forward(self, x): assert np.allclose(y_torch.detach().cpu(), y_oneflow.detach().cpu(), 1e-03, 1e-03) -from onediff.infer_compiler.oneflow.dual_module import DualModule, DualModuleList +from onediff.infer_compiler.backends.oneflow.dual_module import DualModule, DualModuleList assert isinstance(m.linears, DualModuleList) diff --git a/tests/test_quantize_custom_model.py b/tests/test_quantize_custom_model.py index 36d92f9fc..00a2fbce5 100644 --- a/tests/test_quantize_custom_model.py +++ b/tests/test_quantize_custom_model.py @@ -7,8 +7,8 @@ from torch import nn from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import register -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.transform import register +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version is_community = is_community_version() onediff_quant_spec = importlib.util.find_spec("onediff_quant") diff --git a/tests/webui-docker-compose.yml b/tests/webui-docker-compose.yml new file mode 100644 index 000000000..6486726fb --- /dev/null +++ b/tests/webui-docker-compose.yml @@ -0,0 +1,56 @@ +version: "3.8" + +services: + onediff-test: + container_name: ${CONTAINER_NAME} + image: ${ACR_ORG}/${MATRIX_IMAGE} + command: sleep 5400 + privileged: true + shm_size: 8g + network_mode: host + pids_limit: 2000 + cap_add: + - SYS_PTRACE + security_opt: + - seccomp=unconfined + environment: + HF_HUB_OFFLINE: "1" + ONEFLOW_MLIR_ENABLE_TIMING: "1" + ONEFLOW_MLIR_PRINT_STATS: "1" + CI: "1" + SILICON_ONEDIFF_LICENSE_KEY: ${SILICON_ONEDIFF_LICENSE_KEY} + + INDEX_URL: "https://pypi.tuna.tsinghua.edu.cn/simple" + CLIP_PACKAGE: "git+file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/CLIP" + OPENCLIP_PACKAGE: "git+file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/open_clip" + ASSETS_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/stable-diffusion-webui-assets" + STABLE_DIFFUSION_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/stable-diffusion-stability-ai" + STABLE_DIFFUSION_XL_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/generative-models" + K_DIFFUSION_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/k-diffusion" + BLIP_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/BLIP" + + ASSETS_COMMIT_HASH: ${ASSETS_COMMIT_HASH} + STABLE_DIFFUSION_COMMIT_HASH: ${STABLE_DIFFUSION_COMMIT_HASH} + STABLE_DIFFUSION_XL_COMMIT_HASH: ${STABLE_DIFFUSION_XL_COMMIT_HASH} + K_DIFFUSION_COMMIT_HASH: ${K_DIFFUSION_COMMIT_HASH} + BLIP_COMMIT_HASH: ${BLIP_COMMIT_HASH} + + SAFE_DIRECTORIES: | + CLIP + open_clip + stable-diffusion-webui-assets + stable-diffusion-stability-ai + generative-models + k-diffusion + BLIP + + + volumes: + - $HOME/test-container-cache-${CONTAINER_NAME}/dot-local:/root/.local + - $HOME/test-container-cache-${CONTAINER_NAME}/dot-cache:/root/.cache + - /share_nfs:/share_nfs:ro + - ${PWD}/${WEBUI_SRC_DIR}:/app/${WEBUI_SRC_DIR} + - ${PWD}/onediff_sd_webui_extensions:/app/${WEBUI_SRC_DIR}/extensions/onediff_sd_webui_extensions + - $PWD:/src/onediff + working_dir: /src/onediff + restart: "no"