diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
index d979dd8ce..3cb681e33 100644
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -15,11 +15,22 @@ env:
   REGION_ID: cn-beijing
   ACR_ORG: registry.cn-beijing.aliyuncs.com/oneflow
   COMFYUI_SRC_DIR: ComfyUI
+  WEBUI_SRC_DIR: stable-diffusion-webui
+  WEBUI_DEPENDENCIES_SUBDIR: repos
   SDXL_BASE: /share_nfs/hf_models/sd_xl_base_1.0.safetensors
   UNET_INT8: /share_nfs/hf_models/unet_int8
   CONTROL_LORA_OPENPOSEXL2_RANK256: /share_nfs/hf_models/controlnet/control-lora-openposeXL2-rank256.safetensors
   SELENIUM_CONTAINER_NAME: selenium-test
   SELENIUM_IMAGE: standalone-chrome:119.0-chromedriver-119.0-grid-4.15.0-20231129
+
+  # For git repos required by webui
+  ASSETS_COMMIT_HASH: 6f7db241d2f8ba7457bac5ca9753331f0c266917
+  STABLE_DIFFUSION_COMMIT_HASH: cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf
+  STABLE_DIFFUSION_XL_COMMIT_HASH: 45c443b316737a4ab6e40413d7794a7f5657c19f
+  K_DIFFUSION_COMMIT_HASH: ab527a9a6d347f364e3d185ba6d714e22d80cb3c
+  BLIP_COMMIT_HASH: 48211a1594f1321b00f14c9f7a5b4813144b2fb9
+
+
 concurrency:
   group: sd-examples-${{ github.ref }}
   cancel-in-progress: true
@@ -31,6 +42,7 @@ jobs:
     outputs:
       onediff_src_url: ${{ steps.upload_to_oss.outputs.onediff_src_url }}
       comfy_src_url: ${{ steps.upload_to_oss.outputs.comfy_src_url }}
+      webui_src_url: ${{ steps.upload_to_oss.outputs.webui_src_url }}
     steps:
       - name: Setup ossutil
         run: |
@@ -46,6 +58,57 @@ jobs:
         with:
           repository: comfyanonymous/ComfyUI
           path: ComfyUI
+      - name: Checkout Stable Diffusion WebUI
+        uses: actions/checkout@v4
+        with:
+          repository: AUTOMATIC1111/stable-diffusion-webui
+          path: ${{ env.WEBUI_SRC_DIR }}
+
+      # -------- The following are the dependencies required by webui --------
+      - name: Checkout CLIP (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: openai/CLIP
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/CLIP
+          ref: d50d76daa670286dd6cacf3bcd80b5e4823fc8e1
+      - name: Checkout open clip (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: mlfoundations/open_clip
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/open_clip
+          ref: bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b
+      - name: Checkout ${{ env.WEBUI_SRC_DIR }}-assets (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: AUTOMATIC1111/stable-diffusion-webui-assets
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-webui-assets
+          ref: ${{ env.ASSETS_COMMIT_HASH }}
+      - name: Checkout stablediffusion (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: Stability-AI/stablediffusion
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-stability-ai
+          ref: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }}
+      - name: Checkout generative-models (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: Stability-AI/generative-models
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/generative-models
+          ref: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }}
+      - name: Checkout k-diffusion (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: crowsonkb/k-diffusion
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/k-diffusion
+          ref: ${{ env.K_DIFFUSION_COMMIT_HASH }}
+      - name: Checkout BLIP (dependency of webui)
+        uses: actions/checkout@v4
+        with:
+          repository: salesforce/BLIP
+          path: ${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/BLIP
+          ref: ${{ env.BLIP_COMMIT_HASH }}
+      # -------- The above are the dependencies required by webui --------
+
       - name: Pack src
         working-directory: onediff
         run: |
@@ -58,15 +121,24 @@ jobs:
           git reset --hard
           git clean -f
           git archive --prefix ${{ env.COMFYUI_SRC_DIR }}/ --format zip HEAD > comfyui-src.zip
+      - name: Pack webui
+        working-directory: ${{ env.WEBUI_SRC_DIR }}
+        run: |
+          git reset --hard
+          git clean -f
+          zip -r webui-src.zip .
       - name: Upload src
         id: upload_to_oss
         run: |
           ONEDIFF_DST="oss://gh-src-cache/onediff/${{ github.sha }}/onediff-src.zip"
           COMFY_DST="oss://gh-src-cache/onediff/${{ github.sha }}/comfyui-src.zip"
+          WEBUI_DST="oss://gh-src-cache/onediff/${{ github.sha }}/webui-src.zip"
           ./ossutil64 cp --disable-ignore-error --update onediff/onediff-src.zip ${ONEDIFF_DST}
           ./ossutil64 cp --disable-ignore-error --update ComfyUI/comfyui-src.zip ${COMFY_DST}
+          ./ossutil64 cp --disable-ignore-error --update ${{ env.WEBUI_SRC_DIR }}/webui-src.zip ${WEBUI_DST}
           echo "onediff_src_url=${ONEDIFF_DST}" >> $GITHUB_OUTPUT
           echo "comfy_src_url=${COMFY_DST}" >> $GITHUB_OUTPUT
+          echo "webui_src_url=${WEBUI_DST}" >> $GITHUB_OUTPUT
   run-examples:
     name: " ${{ matrix.test-suite }} ${{ matrix.image }}"
     runs-on: [self-hosted, cuda]
@@ -81,6 +153,7 @@ jobs:
         test-suite:
           - diffusers_examples
           - comfy
+          - webui
     steps:
       - name: Login to ACR with the AccessKey pair
         uses: aliyun/acr-login@v1
@@ -110,6 +183,11 @@ jobs:
         run: |
           $HOME/ossutil64 cp ${{ needs.upload_src.outputs.comfy_src_url }} .
           unzip -o $(basename ${{ needs.upload_src.outputs.comfy_src_url }})
+      - name: Checkout WebUI vis OSS
+        if: matrix.test-suite == 'webui' && github.repository == 'siliconflow/onediff'
+        run: |
+          $HOME/ossutil64 cp ${{ needs.upload_src.outputs.webui_src_url }} .
+          unzip -o $(basename ${{ needs.upload_src.outputs.webui_src_url }}) -d ${{env.WEBUI_SRC_DIR}}
       - name: Checkout
         if: github.repository != 'siliconflow/onediff'
         uses: actions/checkout@v4
@@ -119,6 +197,12 @@ jobs:
         with:
           repository: comfyanonymous/ComfyUI
           path: ${{ env.COMFYUI_SRC_DIR }}
+      - name: Checkout Stable Diffusion WebUI
+        if: matrix.test-suite == 'webui' && github.repository != 'siliconflow/onediff'
+        uses: actions/checkout@v4
+        with:
+          repository: AUTOMATIC1111/stable-diffusion-webui
+          path: ${{ env.WEBUI_SRC_DIR }}
       - name: Clean docker containers
         run: |
           docker rm -f ${{ env.CONTAINER_NAME }} || true
@@ -158,6 +242,25 @@ jobs:
           SDXL_BASE: ${{ env.SDXL_BASE }}
           UNET_INT8: ${{ env.UNET_INT8 }}
           SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }}
+          
+      - name: Setup docker for WebUI Test
+        if: matrix.test-suite == 'webui'
+        run: |
+          env
+          docker compose -f tests/webui-docker-compose.yml up -d
+        env:
+          CONTAINER_NAME: ${{ env.CONTAINER_NAME }}
+          MATRIX_IMAGE: ${{ matrix.image }}
+          WEBUI_SRC_DIR: ${{ env.WEBUI_SRC_DIR }}
+          WEBUI_DEPENDENCIES_SUBDIR: ${{ env.WEBUI_DEPENDENCIES_SUBDIR }}
+          SELENIUM_IMAGE: ${{ env.SELENIUM_IMAGE }}
+          SELENIUM_CONTAINER_NAME: ${{ env.SELENIUM_CONTAINER_NAME }}
+          SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }}
+          ASSETS_COMMIT_HASH: ${{ env.ASSETS_COMMIT_HASH }}
+          STABLE_DIFFUSION_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }}
+          STABLE_DIFFUSION_XL_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }}
+          K_DIFFUSION_COMMIT_HASH: ${{ env.K_DIFFUSION_COMMIT_HASH }}
+          BLIP_COMMIT_HASH: ${{ env.BLIP_COMMIT_HASH }}
 
       - run: nvidia-smi
       - run: nvidia-smi -L
@@ -197,6 +300,7 @@ jobs:
 
           run_comfy_test "workflows/sdxl-unet-speedup-graph-saver.json" 200
           run_comfy_test "workflows/sdxl-control-lora-speedup.json" 200
+          run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/ipadapter_advanced.json" 200
           run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache.json" 600
           run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache-with-lora.json" 800
           # run_comfy_test "workflows/text-to-video-speedup.json" 5000
@@ -234,7 +338,9 @@ jobs:
       - if: matrix.test-suite == 'diffusers_examples'
         run: docker exec -w /src/onediff/onediff_diffusers_extensions ${{ env.CONTAINER_NAME }} python3 examples/text_to_image_sdxl_turbo.py --compile true --base /share_nfs/hf_models/sdxl-turbo
       - if: matrix.test-suite == 'diffusers_examples'
-        run: docker exec -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 -m pytest -v onediff_diffusers_extensions/tests/test_lora.py
+        run: |
+          docker exec ${{ env.CONTAINER_NAME }} python3 -m pip install scikit-image -i https://pypi.tuna.tsinghua.edu.cn/simple
+          docker exec -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 -m pytest -v onediff_diffusers_extensions/tests/test_lora.py
       # - if: matrix.test-suite == 'diffusers_examples'
       #   run: docker exec -w /src/onediff/onediff_diffusers_extensions -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=0 ${{ env.CONTAINER_NAME }} python3 examples/text_to_image_sdxl_reuse_pipe.py --base /share_nfs/hf_models/stable-diffusion-xl-base-1.0 --new_base /share_nfs/hf_models/dataautogpt3-OpenDalleV1.1
       - if: matrix.test-suite == 'diffusers_examples' && startsWith(matrix.image, 'onediff-pro')
@@ -242,6 +348,67 @@ jobs:
           docker exec -w /src/onediff ${{ env.CONTAINER_NAME }} python3 onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py  --model /share_nfs/hf_models/stable-diffusion-v1-5-int8 --width 512 --height 512 --saved_image /src/onediff/output_enterprise_sd.png
           docker exec -w /src/onediff ${{ env.CONTAINER_NAME }} python3 tests/test_quantitative_quality.py
       
+      - name: Install Requirements for WebUI
+        if: matrix.test-suite == 'webui'
+        run: |
+          docker exec ${{ env.CONTAINER_NAME }} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+          docker exec ${{ env.CONTAINER_NAME }} python3 -m pip config set global.extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
+          docker exec ${{ env.CONTAINER_NAME }} python3 -m pip install pytorch-lightning gradio==3.41.2 diskcache gitpython pytorch_lightning==1.9.4 scikit-image jsonmerge pillow-avif-plugin torchdiffeq torchsde clean-fid resize-right lark tomesd blendmodes facexlib opencv-python==4.8.0.74 piexif inflection ftfy regex tqdm pydantic==1.10.13
+
+      - name: Prepare environment for WebUI
+        if: matrix.test-suite == 'webui'
+        run: |
+          # hack code to print error msg for debugging
+          # docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} sed -i '/except RuntimeError:/c\    except RuntimeError as e:\n        print(f"Error occurred while running git command: {e}")' modules/launch_utils.py
+          docker exec -d ${{ env.CONTAINER_NAME }} mkdir /app/${{ env.WEBUI_SRC_DIR }}/.git /app/${{ env.WEBUI_SRC_DIR }}/openai
+          docker exec -d ${{ env.CONTAINER_NAME }} ln -s /share_nfs/onediff_ci/sd-webui/models/clips/clip-vit-large-patch14 /app/${{ env.WEBUI_SRC_DIR }}/openai/clip-vit-large-patch14
+          docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} git config --global --add safe.directory /app/${{ env.WEBUI_SRC_DIR }}
+          for dir in $SAFE_DIRECTORIES; do
+            docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} git config --global --add safe.directory /app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/$dir
+            echo /app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/$dir
+          done
+          docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -e venv_dir=- ${{ env.CONTAINER_NAME }} sh -c "bash webui.sh -f --exit --api --no-download-sd-model --do-not-download-clip --disable-safe-unpickle --ckpt-dir /share_nfs/onediff_ci/sd-webui/models"
+
+        # env:
+        #   INDEX_URL: "https://pypi.tuna.tsinghua.edu.cn/simple"
+        #   CLIP_PACKAGE: "git+file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/CLIP"
+        #   OPENCLIP_PACKAGE: "git+file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/open_clip"
+        #   ASSETS_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-webui-assets"
+        #   STABLE_DIFFUSION_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/stable-diffusion-stability-ai"
+        #   STABLE_DIFFUSION_XL_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/generative-models"
+        #   K_DIFFUSION_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/k-diffusion"
+        #   BLIP_REPO: "file:///app/${{ env.WEBUI_SRC_DIR }}/${{ env.WEBUI_DEPENDENCIES_SUBDIR }}/BLIP"
+
+        #   ASSETS_COMMIT_HASH: ${{ env.ASSETS_COMMIT_HASH }}
+        #   STABLE_DIFFUSION_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_COMMIT_HASH }}
+        #   STABLE_DIFFUSION_XL_COMMIT_HASH: ${{ env.STABLE_DIFFUSION_XL_COMMIT_HASH }}
+        #   K_DIFFUSION_COMMIT_HASH: ${{ env.K_DIFFUSION_COMMIT_HASH }}
+        #   BLIP_COMMIT_HASH: ${{ env.BLIP_COMMIT_HASH }}
+
+        #   SAFE_DIRECTORIES: |
+        #     CLIP
+        #     open_clip
+        #     stable-diffusion-webui-assets
+        #     stable-diffusion-stability-ai
+        #     generative-models
+        #     k-diffusion
+        #     BLIP
+
+      - name: Start WebUI Web Service
+        if: matrix.test-suite == 'webui'
+        run: |
+          docker exec -w /app/${{ env.WEBUI_SRC_DIR }} -d ${{ env.CONTAINER_NAME }} sh -c "python3 webui.py --port 7860 --api --no-download-sd-model --do-not-download-clip --disable-safe-unpickle --ckpt-dir /share_nfs/onediff_ci/sd-webui/models --skip-version-check > /app/${{ env.WEBUI_SRC_DIR }}/onediff_webui.log 2>&1"
+          sleep 60
+
+      - run: docker exec ${{ env.CONTAINER_NAME }} ps aux
+
+      - if: matrix.test-suite == 'webui'
+        run: docker exec ${{ env.CONTAINER_NAME }} python3 -m pytest -v -s tests/sd-webui/test_api.py
+
+      - name: Show WebUI Log
+        if: matrix.test-suite == 'webui'
+        run: docker exec ${{ env.CONTAINER_NAME }} cat /app/${{ env.WEBUI_SRC_DIR }}/onediff_webui.log
+
       - name: Shutdown docker for ComfyUI Test
         if: matrix.test-suite == 'comfy'
         run: |
@@ -268,3 +435,16 @@ jobs:
           SDXL_BASE: ${{ env.SDXL_BASE }}
           UNET_INT8: ${{ env.UNET_INT8 }}
           SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }}
+
+      - name: Shutdown docker for WebUI Test
+        if: matrix.test-suite == 'webui'
+        run: |
+          docker compose -f tests/webui-docker-compose.yml down
+        env:
+          CONTAINER_NAME: ${{ env.CONTAINER_NAME }}
+          ACR_ORG: ${{ env.ACR_ORG }}
+          MATRIX_IMAGE: ${{ matrix.image }}
+          WEBUI_SRC_DIR: ${{ env.WEBUI_SRC_DIR }}
+          SELENIUM_IMAGE: ${{ env.SELENIUM_IMAGE }}
+          SELENIUM_CONTAINER_NAME: ${{ env.SELENIUM_CONTAINER_NAME }}
+          SILICON_ONEDIFF_LICENSE_KEY: ${{ secrets.SILICON_ONEDIFF_LICENSE_KEY }}
diff --git a/.gitignore b/.gitignore
index e68a35b10..677317453 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,3 +177,6 @@ unet_graphs
 # onediff_comfy_nodes
 *.pt
 *.graph
+
+# onediff_sd_webui_extensions
+onediff_sd_webui_extensions/compiled_caches/
diff --git a/benchmarks/image_to_video.py b/benchmarks/image_to_video.py
index 730ec752b..fcc9e19cd 100644
--- a/benchmarks/image_to_video.py
+++ b/benchmarks/image_to_video.py
@@ -41,7 +41,7 @@
 
 import oneflow as flow
 import torch
-from onediffx import compile_pipe, compile_options
+from onediffx import compile_pipe, OneflowCompileOptions 
 from diffusers.utils import load_image, export_to_video
 
 
@@ -189,7 +189,8 @@ def main():
         # especially for 40xx series cards.
         # So here by partially disabling the half accumulation in MHA partially,
         # we can get a good balance.
-        compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = (
+        compile_options = OneflowCompileOptions()
+        compile_options.attention_allow_half_precision_score_accumulation_max_m = (
             args.attention_fp16_score_accum_max_m
         )
         pipe = compile_pipe(pipe, options=compile_options)
diff --git a/benchmarks/patch_stable_cascade_of.py b/benchmarks/patch_stable_cascade_of.py
index 8f388111a..454a17344 100644
--- a/benchmarks/patch_stable_cascade_of.py
+++ b/benchmarks/patch_stable_cascade_of.py
@@ -5,7 +5,7 @@
 from packaging import version
 import importlib.metadata
 
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 diffusers_of = transform_mgr.transform_package("diffusers")
 StableCascadeUnet_OF_CLS = (
@@ -120,7 +120,7 @@ def forward(
 )
 
 # torch2oflow_class_map.update({StableCascadeUnet: StableCascadeUnetOflow})
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 from contextlib import contextmanager
 
 
diff --git a/benchmarks/run_text_to_image_benchmark.sh b/benchmarks/run_text_to_image_benchmark.sh
index 273a419b9..512aab42f 100755
--- a/benchmarks/run_text_to_image_benchmark.sh
+++ b/benchmarks/run_text_to_image_benchmark.sh
@@ -111,6 +111,22 @@ benchmark_sd_model sd15 ${SD15_MODEL_PATH} 1024x1024,720x1280,768x768,512x512
 benchmark_sd_model sd21 ${SD21_MODEL_PATH} 1024x1024,720x1280,768x768,512x512
 benchmark_sd_model sdxl ${SDXL_MODEL_PATH} 1024x1024,720x1280,768x768,512x512
 
+benchmark_sd_model_with_throughput() {
+  model_path=$1
+  warmups=$2
+  compiler=$3
+  echo "Run ${model_path} with throughput test at 1024x1024..."
+  script_output=$(python3 ${SCRIPT_DIR}/text_to_image.py --model ${model_path} --variant fp16 --warmups ${warmups} --compiler ${compiler} --height 1024 --width 1024 --throughput | tee /dev/tty)
+
+  throughput=$(echo "${script_output}" | grep -oP '(?<=Throughput without base cost: )\d+\.\d+')
+  inference_time_eq=$(echo "${script_output}" | grep -oP 'Model: Inference Time = .+')
+
+  BENCHMARK_RESULT_TEXT="${BENCHMARK_RESULT_TEXT}| ${model_path} | 1024x1024 | N/A | N/A | N/A | N/A | Throughput without base cost: ${throughput} | ${inference_time_eq} |\n"
+}
+
+benchmark_sd_model_with_throughput ${SD15_MODEL_PATH} ${WARMUPS} ${COMPILER}
+benchmark_sd_model_with_throughput ${SDXL_MODEL_PATH} ${WARMUPS} ${COMPILER}
+
 if [ ${BENCHMARK_QUANT_MODEL} != 0 ] && [ x"${COMPILER}" == x"oneflow" ]; then
   benchmark_sd_model sdxl_quant ${SDXL_QUANT_MODEL_PATH} 1024x1024,720x1280,768x768,512x512
 fi
@@ -119,4 +135,5 @@ if [ ${BENCHMARK_QUANT_MODEL} != 0 ] && [ ${BENCHMARK_DEEP_CACHE_MODEL} != 0 ] &
   benchmark_sd_model sdxl_deepcache_quant ${SDXL_DEEP_CACHE_QUANT_MODEL_PATH} 1024x1024,720x1280,768x768,512x512
 fi
 
+echo -e "\nBenchmark Results:"
 echo -e "${BENCHMARK_RESULT_TEXT}" > ${OUTPUT_FILE}
diff --git a/benchmarks/text_to_image.py b/benchmarks/text_to_image.py
index a8c97c510..539e73f74 100644
--- a/benchmarks/text_to_image.py
+++ b/benchmarks/text_to_image.py
@@ -6,9 +6,9 @@
 CONTROLNET = None
 STEPS = 30
 PROMPT = "best quality, realistic, unreal engine, 4K, a beautiful girl"
-NEGATIVE_PROMPT = None
-SEED = None
-WARMUPS = 3
+NEGATIVE_PROMPT = ""
+SEED = 333
+WARMUPS = 1
 BATCH = 1
 HEIGHT = None
 WIDTH = None
@@ -19,6 +19,8 @@
 CACHE_INTERVAL = 3
 CACHE_LAYER_ID = 0
 CACHE_BLOCK_ID = 0
+COMPILER = "oneflow"
+COMPILER_CONFIG = None
 
 import os
 import importlib
@@ -27,6 +29,8 @@
 import time
 import json
 import torch
+import matplotlib.pyplot as plt
+import numpy as np
 from PIL import Image, ImageDraw
 from diffusers.utils import load_image
 
@@ -56,20 +60,30 @@ def parse_args():
     parser.add_argument("--input-image", type=str, default=INPUT_IMAGE)
     parser.add_argument("--control-image", type=str, default=CONTROL_IMAGE)
     parser.add_argument("--output-image", type=str, default=OUTPUT_IMAGE)
+    parser.add_argument("--throughput", action="store_true")
     parser.add_argument("--deepcache", action="store_true")
     parser.add_argument(
         "--compiler",
         type=str,
-        default="oneflow",
+        default=COMPILER,
         choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"],
     )
+    parser.add_argument(
+        "--compiler-config",
+        type=str,
+        default=COMPILER_CONFIG,
+    )
     return parser.parse_args()
 
+args = parse_args()
+
 
 def load_pipe(
     pipeline_cls,
     model_name,
     variant=None,
+    dtype=torch.float16,
+    device="cuda",
     custom_pipeline=None,
     scheduler=None,
     lora=None,
@@ -80,31 +94,34 @@ def load_pipe(
         extra_kwargs["custom_pipeline"] = custom_pipeline
     if variant is not None:
         extra_kwargs["variant"] = variant
+    if dtype is not None:
+        extra_kwargs["torch_dtype"] = dtype
     if controlnet is not None:
         from diffusers import ControlNetModel
 
         controlnet = ControlNetModel.from_pretrained(
-            controlnet, torch_dtype=torch.float16,
+            controlnet, torch_dtype=dtype,
         )
         extra_kwargs["controlnet"] = controlnet
     if os.path.exists(os.path.join(model_name, "calibrate_info.txt")):
         from onediff.quantization import QuantPipeline
 
         pipe = QuantPipeline.from_quantized(
-            pipeline_cls, model_name, torch_dtype=torch.float16, **extra_kwargs
+            pipeline_cls, model_name, **extra_kwargs
         )
     else:
         pipe = pipeline_cls.from_pretrained(
-            model_name, torch_dtype=torch.float16, **extra_kwargs
+            model_name, **extra_kwargs
         )
-    if scheduler is not None:
+    if scheduler is not None and scheduler != "none":
         scheduler_cls = getattr(importlib.import_module("diffusers"), scheduler)
         pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config)
     if lora is not None:
         pipe.load_lora_weights(lora)
         pipe.fuse_lora()
     pipe.safety_checker = None
-    pipe.to(torch.device("cuda"))
+    if device is not None:
+        pipe.to(torch.device(device))
     return pipe
 
 
@@ -134,8 +151,52 @@ def callback_on_step_end(self, pipe, i, t, callback_kwargs={}):
         return callback_kwargs
 
 
+def calculate_inference_time_and_throughput(height, width, n_steps, model):
+    start_time = time.time()
+    model(prompt=args.prompt, height=height, width=width, num_inference_steps=n_steps)
+    end_time = time.time()
+    inference_time = end_time - start_time
+    # pixels_processed = height * width * n_steps
+    # throughput = pixels_processed / inference_time
+    throughput = n_steps / inference_time
+    return inference_time, throughput
+
+
+def generate_data_and_fit_model(model, steps_range):
+    height, width = 1024, 1024
+    data = {"steps": [], "inference_time": [], "throughput": []}
+
+    for n_steps in steps_range:
+        inference_time, throughput = calculate_inference_time_and_throughput(height, width, n_steps, model)
+        data["steps"].append(n_steps)
+        data["inference_time"].append(inference_time)
+        data["throughput"].append(throughput)
+        print(f"Steps: {n_steps}, Inference Time: {inference_time:.2f} seconds, Throughput: {throughput:.2f} steps/s")
+
+    average_throughput = np.mean(data["throughput"])
+    print(f"Average Throughput: {average_throughput:.2f} steps/s")
+
+    coefficients = np.polyfit(data["steps"], data["inference_time"], 1)
+    base_time_without_base_cost = 1 / coefficients[0]
+    print(f"Throughput without base cost: {base_time_without_base_cost:.2f} steps/s")
+    return data, coefficients
+
+
+def plot_data_and_model(data, coefficients):
+    plt.figure(figsize=(10, 5))
+    plt.scatter(data["steps"], data["inference_time"], color='blue')
+    plt.plot(data["steps"], np.polyval(coefficients, data["steps"]), color='red')
+    plt.title("Inference Time vs. Steps")
+    plt.xlabel("Steps")
+    plt.ylabel("Inference Time (seconds)")
+    plt.grid(True)
+    # plt.savefig("output.png")
+    plt.show()
+
+    print(f"Model: Inference Time = {coefficients[0]:.2f} * Steps + {coefficients[1]:.2f}")
+
+
 def main():
-    args = parse_args()
     if args.input_image is None:
         if args.deepcache:
             from onediffx.deep_cache import StableDiffusionXLPipeline as pipeline_cls
@@ -154,18 +215,32 @@ def main():
         controlnet=args.controlnet,
     )
 
-    height = args.height or pipe.unet.config.sample_size * pipe.vae_scale_factor
-    width = args.width or pipe.unet.config.sample_size * pipe.vae_scale_factor
+    core_net = None
+    if core_net is None:
+        core_net = getattr(pipe, "unet", None)
+    if core_net is None:
+        core_net = getattr(pipe, "transformer", None)
+    height = args.height or core_net.config.sample_size * pipe.vae_scale_factor
+    width = args.width or core_net.config.sample_size * pipe.vae_scale_factor
 
     if args.compiler == "none":
         pass
     elif args.compiler == "oneflow":
         pipe = compile_pipe(pipe)
     elif args.compiler == "nexfort":
-        pipe = compile_pipe(pipe, backend="nexfort")
+        if args.compiler_config is not None:
+            # config with dict
+            options = json.loads(args.compiler_config)
+        else:
+            # config with string
+            options = '{"mode": "max-optimize:max-autotune:freezing:benchmark:cudagraphs", "memory_format": "channels_last"}'
+        pipe = compile_pipe(pipe, backend="nexfort", options=options, fuse_qkv_projections=True)
     elif args.compiler in ("compile", "compile-max-autotune"):
         mode = "max-autotune" if args.compiler == "compile-max-autotune" else None
-        pipe.unet = torch.compile(pipe.unet, mode=mode)
+        if hasattr(pipe, "unet"):
+            pipe.unet = torch.compile(pipe.unet, mode=mode)
+        if hasattr(pipe, "transformer"):
+            pipe.transformer = torch.compile(pipe.transformer, mode=mode)
         if hasattr(pipe, "controlnet"):
             pipe.controlnet = torch.compile(pipe.controlnet, mode=mode)
         pipe.vae = torch.compile(pipe.vae, mode=mode)
@@ -199,7 +274,6 @@ def get_kwarg_inputs():
             negative_prompt=args.negative_prompt,
             height=height,
             width=width,
-            num_inference_steps=args.steps,
             num_images_per_prompt=args.batch,
             generator=None
             if args.seed is None
@@ -210,6 +284,8 @@ def get_kwarg_inputs():
                 else json.loads(args.extra_call_kwargs)
             ),
         )
+        if args.steps is not None:
+            kwarg_inputs["num_inference_steps"] = args.steps
         if input_image is not None:
             kwarg_inputs["image"] = input_image
         if control_image is not None:
@@ -227,10 +303,15 @@ def get_kwarg_inputs():
     # The initial calls will trigger compilation and might be very slow.
     # After that, it should be very fast.
     if args.warmups > 0:
+        begin = time.time()
+        print("=======================================")
         print("Begin warmup")
         for _ in range(args.warmups):
             pipe(**get_kwarg_inputs())
+        end = time.time()
         print("End warmup")
+        print(f"Warmup time: {end - begin:.3f}s")
+        print("=======================================")
 
     # Let"s see it!
     # Note: Progress bar might work incorrectly due to the async nature of CUDA.
@@ -255,7 +336,7 @@ def get_kwarg_inputs():
         cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed() / 1024
     else:
         cuda_mem_after_used = torch.cuda.max_memory_allocated() / (1024 ** 3)
-    print(f"CUDA Mem after: {cuda_mem_after_used:.3f}GiB")
+    print(f"Max used CUDA memory : {cuda_mem_after_used:.3f}GiB")
     print("=======================================")
 
     if args.output_image is not None:
@@ -263,6 +344,11 @@ def get_kwarg_inputs():
     else:
         print("Please set `--output-image` to save the output image")
 
+    if args.throughput:
+        steps_range = range(1, 100, 1) 
+        data, coefficients = generate_data_and_fit_model(pipe, steps_range)
+        plot_data_and_model(data, coefficients)
+
 
 if __name__ == "__main__":
     main()
diff --git a/onediff_comfy_nodes/extras_nodes/nodes_compare.py b/onediff_comfy_nodes/extras_nodes/nodes_compare.py
index 4f4461d9b..a06fa9edb 100644
--- a/onediff_comfy_nodes/extras_nodes/nodes_compare.py
+++ b/onediff_comfy_nodes/extras_nodes/nodes_compare.py
@@ -5,7 +5,7 @@
 import folder_paths
 import numpy as np
 import oneflow as flow
-from onediff.infer_compiler.transform.builtin_transform import torch2oflow
+from onediff.infer_compiler.backends.oneflow.transform.builtin_transform import torch2oflow
 from PIL import Image
 
 try:
@@ -148,8 +148,9 @@ def save_images(
         )
         results = list()
         for image1, image2 in zip(images1, images2):
+
             # image diff
-            image = image1 - image2
+            image = image1.cuda() - image2.cuda()
 
             i = 255.0 * image.cpu().numpy()
             img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
diff --git a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py
index ca22873ee..9daa567e6 100644
--- a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py
+++ b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py
@@ -7,7 +7,7 @@
 from comfy import model_management
 from comfy.cli_args import args
 
-from onediff.infer_compiler.utils import is_community_version
+from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version
 
 from ..modules.oneflow.config import ONEDIFF_QUANTIZED_OPTIMIZED_MODELS
 from ..modules.oneflow.hijack_animatediff import animatediff_hijacker
@@ -17,6 +17,7 @@
 from ..modules.oneflow.hijack_samplers import samplers_hijack
 from ..modules.oneflow.hijack_comfyui_instantid import comfyui_instantid_hijacker
 from ..modules.oneflow.hijack_model_patcher import model_patch_hijacker
+from ..modules.oneflow.hijack_utils import comfy_utils_hijack
 from ..modules.oneflow import BasicOneFlowBoosterExecutor
 from ..modules.oneflow import DeepcacheBoosterExecutor
 from ..modules.oneflow import PatchBoosterExecutor
@@ -35,6 +36,7 @@
 ipadapter_plus_hijacker.hijack()
 comfyui_instantid_hijacker.hijack()
 model_patch_hijacker.hijack()
+comfy_utils_hijack.hijack()
 
 import comfy_extras.nodes_video_model
 from nodes import CheckpointLoaderSimple
diff --git a/onediff_comfy_nodes/modules/oneflow/booster_basic.py b/onediff_comfy_nodes/modules/oneflow/booster_basic.py
index 608462861..f35d4f27d 100644
--- a/onediff_comfy_nodes/modules/oneflow/booster_basic.py
+++ b/onediff_comfy_nodes/modules/oneflow/booster_basic.py
@@ -7,7 +7,7 @@
 from comfy.model_patcher import ModelPatcher
 from comfy.sd import VAE
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule
+from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule
 
 from ..booster_interface import BoosterExecutor
 from .onediff_controlnet import OneDiffControlLora
@@ -47,6 +47,7 @@ def _(self, model: ModelPatcher, ckpt_name: Optional[str] = None, **kwargs):
             )
             set_compiled_options(compiled_model, graph_file)
 
+
         return model
 
     @execute.register(ControlNet)
diff --git a/onediff_comfy_nodes/modules/oneflow/booster_patch.py b/onediff_comfy_nodes/modules/oneflow/booster_patch.py
index 6bff76ba9..b12e1a042 100644
--- a/onediff_comfy_nodes/modules/oneflow/booster_patch.py
+++ b/onediff_comfy_nodes/modules/oneflow/booster_patch.py
@@ -2,7 +2,7 @@
 from functools import singledispatchmethod
 
 from comfy.model_patcher import ModelPatcher
-from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule
+from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule
 
 from ..booster_interface import BoosterExecutor
 
diff --git a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py
index 7254ae0b3..f4b50d6e4 100644
--- a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py
+++ b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py
@@ -8,7 +8,7 @@
 from comfy.controlnet import ControlNet
 from comfy.model_patcher import ModelPatcher
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule
+from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule
 from onediff_quant.quantization import QuantizationConfig
 from onediff_quant.quantization.module_operations import get_sub_module
 from onediff_quant.quantization.quantize_calibrators import (
diff --git a/onediff_comfy_nodes/modules/oneflow/config.py b/onediff_comfy_nodes/modules/oneflow/config.py
index 353c4f024..8a6494e31 100644
--- a/onediff_comfy_nodes/modules/oneflow/config.py
+++ b/onediff_comfy_nodes/modules/oneflow/config.py
@@ -3,7 +3,7 @@
 import sys
 from pathlib import Path
 
-from onediff.infer_compiler.utils import is_community_version
+from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version
 
 # Set up paths
 ONEDIFF_QUANTIZED_OPTIMIZED_MODELS = "onediff_quant"
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py
index 167789792..d6340640f 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py
@@ -4,8 +4,8 @@
 """
 import os
 
-from onediff.infer_compiler.import_tools import DynamicModuleLoader
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 from ...sd_hijack_utils import Hijacker
 
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py
index 74f8dd9c4..3bbc579dc 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py
@@ -1,7 +1,7 @@
 # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/motion_module_ad.py
 import oneflow as torch
 from einops import repeat
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 
 from ._config import animatediff_of, animatediff_pt
 
@@ -124,7 +124,7 @@ def forward(
 )
 
 # import torch as torch_pt
-# from onediff.infer_compiler.transform import torch2oflow
+# from onediff.infer_compiler.backends.oneflow.transform import torch2oflow
 
 # @torch2oflow.register(TemporalTransformer3DModel_PT_CLS)
 # def _(mod, verbose=False):
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py
index 720c5ab2a..ea201069b 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py
@@ -2,7 +2,7 @@
 import oneflow as flow
 from einops import rearrange
 from onediff.infer_compiler import DeployableModule
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 from oneflow.nn.functional import group_norm
 
 from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py
index d1b4f3885..1fafec133 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py
@@ -1,6 +1,6 @@
 # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/utils_motion.py
 import oneflow as torch
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 
 from ._config import animatediff_of, animatediff_pt
 
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py
index ec2a1903e..d18438434 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py
@@ -2,8 +2,8 @@
 import traceback
 
 COMFYUI_ROOT = os.getenv("COMFYUI_ROOT")
-from onediff.infer_compiler.import_tools import DynamicModuleLoader
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 from ...sd_hijack_utils import Hijacker
 
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py
index 37d11f083..b9da6376b 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py
@@ -2,8 +2,8 @@
 import traceback
 
 COMFYUI_ROOT = os.getenv("COMFYUI_ROOT")
-from onediff.infer_compiler.import_tools import DynamicModuleLoader
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 from ...sd_hijack_utils import Hijacker
 
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py
index 9be19105d..588fe7971 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py
@@ -1,11 +1,11 @@
+import torch
 from register_comfy.CrossAttentionPatch import Attn2Replace, ipadapter_attention
 
-from onediff.infer_compiler.transform import torch2oflow
+from comfy import model_management
+from onediff.infer_compiler.backends.oneflow.transform import torch2oflow
 from ..utils.booster_utils import clear_deployable_module_cache_and_unbind
 from ..patch_management import PatchType, create_patch_executor
 
-# from onediff.infer_compiler.utils.cost_util import cost_time
-# @cost_time(debug=True, message="set_model_patch_replace_v2")
 def set_model_patch_replace_v2(org_fn, model, patch_kwargs, key):
     diff_model = model.model.diffusion_model
     cache_patch_executor = create_patch_executor(PatchType.CachedCrossAttentionPatch)
@@ -41,6 +41,12 @@ def split_patch_kwargs(patch_kwargs):
             else:
                 split2dict[k] = v
 
+        # patch for weight
+        weight = split1dict["weight"]
+        if isinstance(weight, (int, float)):
+            weight = torch.tensor([weight])
+            split1dict["weight"] = weight.to(model_management.get_torch_device())
+
         return split1dict, split2dict
 
     new_patch_kwargs, patch_kwargs = split_patch_kwargs(patch_kwargs)
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_utils.py b/onediff_comfy_nodes/modules/oneflow/hijack_utils.py
new file mode 100644
index 000000000..4a4f25c5a
--- /dev/null
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_utils.py
@@ -0,0 +1,28 @@
+"""hijack ComfyUI/comfy/utils.py"""
+import torch
+from comfy.utils import copy_to_param
+from ..sd_hijack_utils import Hijacker
+
+
+def copy_to_param_of(org_fn, obj, attr, value):
+    # inplace update tensor instead of replacing it
+    attrs = attr.split(".")
+    for name in attrs[:-1]:
+        obj = getattr(obj, name)
+    prev = getattr(obj, attrs[-1])
+
+    if prev.data.dtype == torch.int8 and prev.data.dtype != value.dtype:
+        return
+
+    prev.data.copy_(value)
+
+
+def cond_func(orig_func, *args, **kwargs):
+    return True
+
+
+comfy_utils_hijack = Hijacker()
+
+comfy_utils_hijack.register(
+    orig_func=copy_to_param, sub_func=copy_to_param_of, cond_func=cond_func
+)
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py
index e1c91b7ba..32b668121 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py
@@ -7,8 +7,8 @@
 
 
 from nodes import *  # must imported before import comfy
-from onediff.infer_compiler.transform import register
-from onediff.infer_compiler.utils import is_community_version
+from onediff.infer_compiler.backends.oneflow.transform import register
+from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version
 
 from .attention import CrossAttention as CrossAttention1f
 from .attention import SpatialTransformer as SpatialTransformer1f
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py
index 3eb09d9fb..27bf9165a 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py
@@ -8,7 +8,7 @@
 import oneflow as torch
 import oneflow.nn as nn
 from einops import rearrange, repeat
-from onediff.infer_compiler.transform import proxy_class, transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr
 
 onediff_comfy = transform_mgr.transform_package("comfy")
 
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py
index 854eb9f85..e320170c0 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py
@@ -4,7 +4,7 @@
 import oneflow as torch
 import oneflow.nn as nn
 import oneflow.nn.functional as F
-from onediff.infer_compiler.transform import proxy_class
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class
 
 
 def Normalize(in_channels, num_groups=32):
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py
index cf6e54553..638b4b3cd 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py
@@ -1,5 +1,5 @@
 import oneflow as torch
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 transformed_comfy = transform_mgr.transform_package("comfy")
 proxy_ops = transformed_comfy.ops
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py
index 88cc98469..b8469004b 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py
@@ -4,7 +4,7 @@
 import oneflow as th  # 'th' is the way ComfyUI name the torch
 import oneflow.nn.functional as F
 from einops import rearrange
-from onediff.infer_compiler.transform import proxy_class, transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr
 
 onediff_comfy = transform_mgr.transform_package("comfy")
 
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py
index 86e822739..14f1a26d8 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py
@@ -1,7 +1,7 @@
 # ComfyUI/comfy/ldm/modules/diffusionmodules/model.py
 
 import oneflow as torch
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 onediff_comfy = transform_mgr.transform_package("comfy")
 
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py
index d05e8acb5..48f0de2a6 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py
@@ -1,6 +1,6 @@
 import onediff_quant
 import oneflow as flow
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 
 torch2oflow_class_map = {
     onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule,
diff --git a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py
index 5b8143605..80a242f2d 100644
--- a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py
+++ b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py
@@ -1,6 +1,6 @@
 from register_comfy.CrossAttentionPatch import is_crossAttention_patch
 
-from onediff.infer_compiler.utils import online_quantization_utils
+from onediff.infer_compiler.backends.oneflow import online_quantization_utils
 
 from .patch_executor import PatchExecutorBase
 
diff --git a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py
index ee42cc171..a70246405 100644
--- a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py
+++ b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py
@@ -5,8 +5,8 @@
 from comfy.model_base import BaseModel, SVD_img2vid
 from comfy.model_patcher import ModelPatcher
 
-from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule
-from onediff.infer_compiler.utils import set_boolean_env_var
+from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule
+from onediff.utils import set_boolean_env_var
 
 from ..patch_management import PatchType, create_patch_executor
 
diff --git a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py
index 96844fb2e..34acfe3b0 100644
--- a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py
+++ b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py
@@ -4,8 +4,8 @@
 from comfy import model_management
 from folder_paths import get_input_directory
 # onediff
-from onediff.infer_compiler import CompileOptions, oneflow_compile
-from onediff.infer_compiler.transform import torch2oflow
+from onediff.infer_compiler import OneflowCompileOptions, oneflow_compile
+from onediff.infer_compiler.backends.oneflow.transform import torch2oflow
 from onediff.optimization.quant_optimizer import quantize_model
 
 # onediff_comfy_nodes
@@ -18,9 +18,9 @@ def compoile_unet(diffusion_model, graph_file):
 
     print(f" OneDiffCheckpointLoaderSimple load_checkpoint file_path {graph_file}")
 
-    compile_options = CompileOptions()
-    compile_options.oneflow.graph_file = graph_file
-    compile_options.oneflow.graph_file_device = load_device
+    compile_options = OneflowCompileOptions()
+    compile_options.graph_file = graph_file
+    compile_options.graph_file_device = load_device
     diffusion_model = oneflow_compile(diffusion_model, options=compile_options)
 
     return diffusion_model
diff --git a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py
index be22c7e64..6441673d6 100644
--- a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py
+++ b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py
@@ -33,7 +33,7 @@ def __init__(
         graph_device=None,
     ):
         from onediff.infer_compiler import (
-            CompileOptions,
+            OneflowCompileOptions,
             oneflow_compile,
             DeployableModule,
         )
@@ -49,10 +49,10 @@ def __init__(
                 "diffusion_model"
             ] = self.model.diffusion_model
         else:
-            options = CompileOptions()
-            options.oneflow.use_graph = use_graph
-            options.oneflow.graph_file = graph_path
-            options.oneflow.graph_file_device = graph_device
+            options = OneflowCompileOptions()
+            options.use_graph = use_graph
+            options.graph_file = graph_path
+            options.graph_file_device = graph_device
             self.model.__dict__["_modules"]["diffusion_model"] = oneflow_compile(
                 self.model.diffusion_model, options=options
             )
@@ -506,7 +506,7 @@ def __init__(
         gen_compile_options=None,
     ):
         from onediff.infer_compiler import (
-            CompileOptions,
+            OneflowCompileOptions,
             oneflow_compile,
             DeployableModule,
         )
@@ -525,14 +525,14 @@ def __init__(
             self.model.diffusion_model, cache_layer_id, cache_block_id
         )
         if use_graph:
-            gen_compile_options = gen_compile_options or (lambda x: CompileOptions())
+            gen_compile_options = gen_compile_options or (lambda x: OneflowCompileOptions())
             compile_options = gen_compile_options(self.deep_cache_unet)
-            compile_options.oneflow.use_graph = use_graph
+            compile_options.use_graph = use_graph
             self.deep_cache_unet = oneflow_compile(
                 self.deep_cache_unet, options=compile_options,
             )
             compile_options = gen_compile_options(self.fast_deep_cache_unet)
-            compile_options.oneflow.use_graph = use_graph
+            compile_options.use_graph = use_graph
             self.fast_deep_cache_unet = oneflow_compile(
                 self.fast_deep_cache_unet, options=compile_options,
             )
diff --git a/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py
index c56ef3244..2c702e383 100644
--- a/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py
+++ b/onediff_comfy_nodes/modules/oneflow/utils/onediff_load_utils.py
@@ -3,7 +3,7 @@
 import folder_paths
 import torch
 from comfy import model_management
-from onediff.infer_compiler import CompileOptions, oneflow_compile
+from onediff.infer_compiler import OneflowCompileOptions, oneflow_compile
 
 from ..config import _USE_UNET_INT8, ONEDIFF_QUANTIZED_OPTIMIZED_MODELS
 from .graph_path import generate_graph_path
@@ -49,11 +49,11 @@ def onediff_load_quant_checkpoint_advanced(
         )
 
     if vae_speedup == "enable":
-        compile_options = CompileOptions()
-        compile_options.oneflow.graph_file = generate_graph_path(
+        compile_options = OneflowCompileOptions()
+        compile_options.graph_file = generate_graph_path(
             ckpt_name, vae.first_stage_model
         )
-        compile_options.oneflow.graph_file_device = model_management.get_torch_device()
+        compile_options.graph_file_device = model_management.get_torch_device()
         vae.first_stage_model = oneflow_compile(
             vae.first_stage_model, options=compile_options
         )
diff --git a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py
index 048a0312d..a14b15603 100644
--- a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py
+++ b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py
@@ -10,8 +10,7 @@
 from nodes import KSampler, VAEDecode
 from onediff.infer_compiler import oneflow_compile
 # onediff
-from onediff.infer_compiler.utils.module_operations import (get_sub_module,
-                                                            modify_sub_module)
+from onediff.torch_utils.module_operations import (get_sub_module, modify_sub_module)
 from onediff_quant import Quantizer
 # onediff_quant
 from onediff_quant.utils import (find_quantizable_modules, get_quantize_module,
diff --git a/onediff_diffusers_extensions/README.md b/onediff_diffusers_extensions/README.md
index 7ef564f6c..1704aeb63 100644
--- a/onediff_diffusers_extensions/README.md
+++ b/onediff_diffusers_extensions/README.md
@@ -208,7 +208,7 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
 )
 pipe.to("cuda")
 
-compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = 0
+compile_options.attention_allow_half_precision_score_accumulation_max_m = 0
 pipe = compile_pipe(pipe, options=compile_options)
 
 input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true")
diff --git a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py
index cbd7dc81f..ebdc5de2f 100644
--- a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py
+++ b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py
@@ -18,7 +18,7 @@
 from diffusers import EulerDiscreteScheduler
 from diffusers import utils
 
-from onediff.infer_compiler.utils.cost_util import cost_cnt
+from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt
 
 
 _MODEL_ID = "stabilityai/stable-diffusion-2"
diff --git a/onediff_diffusers_extensions/examples/pixart_alpha/README.md b/onediff_diffusers_extensions/examples/pixart_alpha/README.md
new file mode 100644
index 000000000..a63891526
--- /dev/null
+++ b/onediff_diffusers_extensions/examples/pixart_alpha/README.md
@@ -0,0 +1,53 @@
+# Run PixArt alpha (with nexfort backend)
+## Environment setup
+### Set up onediff
+https://github.com/siliconflow/onediff?tab=readme-ov-file#installation
+
+### Set up nexfort backend
+https://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort
+
+### Set up PixArt alpha
+HF model: https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
+
+HF pipeline: https://huggingface.co/docs/diffusers/main/en/api/pipelines/pixart
+
+## Run
+model_id_or_path_to_PixArt-XL-2-1024-MS is the model id or model path of pixart alpha, such as `/data/hf_models/PixArt-XL-2-1024-MS/`
+
+### Go to the onediff folder
+```
+cd onediff
+```
+
+### Run 1024*1024 without compile(the original pytorch HF diffusers pipeline)
+```
+python3 ./benchmarks/text_to_image.py --model /data/hf_models/PixArt-XL-2-1024-MS/ --scheduler none --steps 20 --compiler none --output-image ./pixart_alpha.png
+```
+
+### Run 1024*1024 with compile
+```
+python3 ./benchmarks/text_to_image.py --model /data/hf_models/PixArt-XL-2-1024-MS/ --scheduler none --steps 20 --compiler nexfort --output-image ./pixart_alpha.png
+```
+
+## Performance comparation
+### nexfort compile config
+- compiler-config default is `{"mode": "max-optimize:max-autotune:freezing:benchmark:cudagraphs", "memory_format": "channels_last"}` in `/benchmarks/text_to_image.py`
+  - setting `--compiler-config '{"mode": "max-autotune", "memory_format": "channels_last"}'` will reduce compilation time to 57.863s and just slightly reduce the performance
+- fuse_qkv_projections: True
+
+### Metric
+| Metric                                           | NVIDIA A100-PCIE-40GB (1024 * 1024) |
+| ------------------------------------------------ | ----------------------------------- |
+| Data update date(yyyy-mm-dd)                     | 2024-05-23                          |
+| PyTorch iteration speed                          | 8.623it/s                           |
+| OneDiff iteration speed                          | 10.743it/s(+24.58%)                 |
+| PyTorch E2E time                                 | 2.568s                              |
+| OneDiff E2E time                                 | 1.992s(-22.4%)                      |
+| PyTorch Max Mem Used                             | 14.445GiB                           |
+| OneDiff Max Mem Used                             | 13.855GiB                           |
+| PyTorch Warmup with Run time                     | 4.100s                              |
+| OneDiff Warmup with Compilation time<sup>1</sup> | 115.309s                            |
+| OneDiff Warmup with Cache time                   | TODO                                |
+
+ <sup>1</sup> OneDiff Warmup with Compilation time is tested on Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz. Note this is just for reference, and it varies a lot on different CPU.
+
diff --git a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py
index a6f323998..2081fbaa6 100644
--- a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py
+++ b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 
 # oneflow_compile should be imported before importing any diffusers
-from onediff.infer_compiler import oneflow_compile, compile_options
+from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions
 
 
 def parse_args():
@@ -110,7 +110,8 @@ def parse_args():
         pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
     )
 
-compile_options.oneflow.use_graph = args.graph
+compile_options = OneflowCompileOptions()
+compile_options.use_graph = args.graph
 
 if args.compile_text_encoder:
     if pipe.text_encoder is not None:
diff --git a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py
index e5b150052..e42b47071 100644
--- a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py
+++ b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py
@@ -2,7 +2,7 @@
 import time
 import argparse
 
-from onediff.infer_compiler import oneflow_compile, compile_options
+from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions
 
 import torch
 import torch.nn as nn
@@ -92,7 +92,8 @@ def parse_args():
         pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
     )
 
-compile_options.oneflow.use_graph = args.graph
+compile_options = OneflowCompileOptions()
+compile_options.use_graph = args.graph
 
 if args.compile_text_encoder:
     if pipe.text_encoder is not None:
diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py
index 859b2e491..5a164d239 100644
--- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py
+++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 
 # oneflow_compile should be imported before importing any diffusers
-from onediff.infer_compiler import oneflow_compile, compile_options
+from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions
 
 
 def parse_args():
@@ -90,7 +90,8 @@ def parse_args():
         pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
     )
 
-compile_options.oneflow.use_graph = args.graph
+compile_options = OneflowCompileOptions()
+compile_options.use_graph = args.graph
 
 if args.compile_text_encoder:
     if pipe.text_encoder is not None:
diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py
index aa5d86058..06d16c81f 100644
--- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py
+++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from diffusers import DiffusionPipeline
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.utils import TensorInplaceAssign
+from onediff.torch_utils import TensorInplaceAssign
 
 try:
     from onediffx.lora import load_and_fuse_lora, unfuse_lora, update_graph_with_constant_folding_info
diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py
index ae9488221..0da27858f 100644
--- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py
+++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_save_load.py
@@ -7,7 +7,7 @@
 import torch
 import oneflow as flow
 
-from onediff.infer_compiler import oneflow_compile, compile_options
+from onediff.infer_compiler import oneflow_compile, OneflowCompileOptions
 from diffusers import DiffusionPipeline
 
 parser = argparse.ArgumentParser()
@@ -47,7 +47,8 @@
 
 # Compile unet and vae
 print("unet and vae is compiled to oneflow.")
-compile_options.oneflow.max_cached_graph_size = cmd_args.num_dynamic_input_size
+compile_options = OneflowCompileOptions()
+compile_options.max_cached_graph_size = cmd_args.num_dynamic_input_size
 
 base.unet = oneflow_compile(base.unet, options=compile_options)
 base.vae.decoder = oneflow_compile(base.vae.decoder, options=compile_options)
diff --git a/onediff_diffusers_extensions/onediffx/__init__.py b/onediff_diffusers_extensions/onediffx/__init__.py
index 2da48e8f8..532dad12c 100644
--- a/onediff_diffusers_extensions/onediffx/__init__.py
+++ b/onediff_diffusers_extensions/onediffx/__init__.py
@@ -1,5 +1,5 @@
 __version__ = "1.1.0.dev1"
-from onediff.infer_compiler import compile_options
 from .compilers.diffusion_pipeline_compiler import compile_pipe, save_pipe, load_pipe
+from onediff.infer_compiler import OneflowCompileOptions
 
-__all__ = ["compile_pipe", "compile_options", "save_pipe", "load_pipe"]
+__all__ = ["compile_pipe", "save_pipe", "load_pipe", "OneflowCompileOptions"]
diff --git a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py
index 3307991e3..dbd784367 100644
--- a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py
+++ b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py
@@ -1,7 +1,7 @@
 import os
 import torch
 from onediff.infer_compiler import compile, DeployableModule
-from onediff.infer_compiler.utils.log_utils import logger
+from onediff.utils import logger
 
 
 def _recursive_getattr(obj, attr, default=None):
@@ -29,11 +29,11 @@ def _recursive_setattr(obj, attr, value):
     "fast_unet",  # for deepcache
     "prior",  # for StableCascadePriorPipeline
     "decoder",  # for StableCascadeDecoderPipeline
+    "transformer",  # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline
     "vqgan.down_blocks",  # for StableCascadeDecoderPipeline
     "vqgan.up_blocks",  # for StableCascadeDecoderPipeline
     "vae.decoder",
     "vae.encoder",
-    "transformer",  # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline
 ]
 
 
@@ -52,8 +52,20 @@ def _filter_parts(ignores=()):
 
 
 def compile_pipe(
-    pipe, *, backend="oneflow", options=None, ignores=(),
+    pipe, *, backend="oneflow", options=None, ignores=(), fuse_qkv_projections=False,
 ):
+    if fuse_qkv_projections:
+        pipe = fuse_qkv_projections_in_pipe(pipe)
+    
+    if backend == "nexfort" and isinstance(options, str):
+        import json
+        options = json.loads(options)
+
+    if backend == "nexfort" and options is not None and "memory_format" in options:
+        memory_format = getattr(torch, options["memory_format"])
+        pipe = convert_pipe_to_memory_format(pipe, ignores=ignores, memory_format=memory_format)
+        del options["memory_format"]
+
     # To fix the bug of graph load of vae. Please refer to: https://github.com/siliconflow/onediff/issues/452
     if (
         hasattr(pipe, "upcast_vae")
@@ -82,6 +94,33 @@ def compile_pipe(
 
     return pipe
 
+def fuse_qkv_projections_in_pipe(pipe):
+    if hasattr(pipe, "fuse_qkv_projections"):
+        pipe.fuse_qkv_projections()
+    return pipe
+
+
+def convert_pipe_to_memory_format(pipe, *, ignores=(), memory_format=torch.preserve_format):
+    from nexfort.utils.attributes import multi_recursive_apply
+    from nexfort.utils.memory_format import apply_memory_format
+    import functools
+    if memory_format == torch.preserve_format:
+        return pipe
+
+    parts = [
+        "unet",
+        "controlnet",
+        "fast_unet",  # for deepcache
+        "prior",  # for StableCascadePriorPipeline
+        "decoder",  # for StableCascadeDecoderPipeline
+        "transformer",  # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline
+        "vqgan",  # for StableCascadeDecoderPipeline
+        "vae",
+    ]
+    multi_recursive_apply(
+        pipe, parts, functools.partial(apply_memory_format, memory_format=memory_format), ignores=ignores, verbose=True
+    )
+    return pipe
 
 def save_pipe(pipe, dir="cached_pipe", *, ignores=(), overwrite=True):
     if not os.path.exists(dir):
diff --git a/onediff_diffusers_extensions/onediffx/lora/__init__.py b/onediff_diffusers_extensions/onediffx/lora/__init__.py
index 24b78f93d..5d99001bc 100644
--- a/onediff_diffusers_extensions/onediffx/lora/__init__.py
+++ b/onediff_diffusers_extensions/onediffx/lora/__init__.py
@@ -6,4 +6,4 @@
     get_active_adapters,
 )
 
-from onediff.infer_compiler.utils.param_utils import update_graph_with_constant_folding_info
+from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_with_constant_folding_info
diff --git a/onediff_diffusers_extensions/onediffx/lora/lora.py b/onediff_diffusers_extensions/onediffx/lora/lora.py
index f5bb290b4..8e7896094 100644
--- a/onediff_diffusers_extensions/onediffx/lora/lora.py
+++ b/onediff_diffusers_extensions/onediffx/lora/lora.py
@@ -5,7 +5,7 @@
 
 import torch
 
-from onediff.infer_compiler.utils.log_utils import logger
+from onediff.utils import logger
 
 import diffusers
 from diffusers.loaders import LoraLoaderMixin
diff --git a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py
index a0bdf76d0..df8f17ebe 100644
--- a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py
+++ b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py
@@ -19,7 +19,7 @@
 from diffusers.utils import is_accelerate_available
 
 from diffusers.models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
-from onediff.infer_compiler.utils.log_utils import logger
+from onediff.utils import logger
 
 from .utils import fuse_lora, get_adapter_names
 
diff --git a/onediff_diffusers_extensions/onediffx/lora/unet.py b/onediff_diffusers_extensions/onediffx/lora/unet.py
index cca033aa1..98834eeaa 100644
--- a/onediff_diffusers_extensions/onediffx/lora/unet.py
+++ b/onediff_diffusers_extensions/onediffx/lora/unet.py
@@ -4,7 +4,7 @@
 
 import torch
 from onediff.infer_compiler import DeployableModule
-from onediff.infer_compiler.utils.log_utils import logger
+from onediff.utils import logger
 from diffusers.models.lora import (
     LoRACompatibleConv,
     LoRACompatibleLinear,
diff --git a/onediff_diffusers_extensions/onediffx/lora/utils.py b/onediff_diffusers_extensions/onediffx/lora/utils.py
index 49fe2aca2..89b029d45 100644
--- a/onediff_diffusers_extensions/onediffx/lora/utils.py
+++ b/onediff_diffusers_extensions/onediffx/lora/utils.py
@@ -14,13 +14,13 @@
 else:
     is_peft_available = lambda: False
 
-from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor
+from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_related_tensor
 
 if version.parse(diffusers.__version__) <= version.parse("0.20.0"):
     from diffusers.loaders import PatchedLoraProjection
 else:
     from diffusers.models.lora import PatchedLoraProjection
-from onediff.infer_compiler.oneflow.dual_module import DualModule
+from onediff.infer_compiler.backends.oneflow.dual_module import DualModule
 
 if version.parse(diffusers.__version__) <= version.parse("0.20.0"):
     from diffusers.loaders import PatchedLoraProjection
diff --git a/onediff_diffusers_extensions/tests/profile_lora.py b/onediff_diffusers_extensions/tests/profile_lora.py
index 1bf310aee..1ecdc3535 100644
--- a/onediff_diffusers_extensions/tests/profile_lora.py
+++ b/onediff_diffusers_extensions/tests/profile_lora.py
@@ -7,7 +7,7 @@
 from diffusers import DiffusionPipeline
 
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.utils import TensorInplaceAssign
+from onediff.torch_utils import TensorInplaceAssign
 from onediffx.lora import load_and_fuse_lora, unfuse_lora
 
 _time = None
diff --git a/onediff_diffusers_extensions/tests/profile_multi_lora.py b/onediff_diffusers_extensions/tests/profile_multi_lora.py
index 88b3d7cde..e50b6a750 100644
--- a/onediff_diffusers_extensions/tests/profile_multi_lora.py
+++ b/onediff_diffusers_extensions/tests/profile_multi_lora.py
@@ -8,7 +8,7 @@
 from diffusers.utils.constants import USE_PEFT_BACKEND
 
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.utils import TensorInplaceAssign
+from onediff.torch_utils import TensorInplaceAssign
 from onediffx.lora import load_and_fuse_lora, unfuse_lora, set_and_fuse_adapters
 
 if not USE_PEFT_BACKEND:
diff --git a/onediff_sd_webui_extensions/api_examples/img2img.py b/onediff_sd_webui_extensions/api_examples/img2img.py
index 4b5e6ee79..1512ba8b8 100644
--- a/onediff_sd_webui_extensions/api_examples/img2img.py
+++ b/onediff_sd_webui_extensions/api_examples/img2img.py
@@ -6,13 +6,13 @@
 # And if you are using OneDiff Enterprise, add another
 # `"script_args" : [{"0": True}]` field to enable quantization
 
-from datetime import datetime
-from pathlib import Path
-import urllib.request
 import base64
 import json
-import time
 import os
+import time
+import urllib.request
+from datetime import datetime
+from pathlib import Path
 
 webui_server_url = "http://127.0.0.1:7860"
 
diff --git a/onediff_sd_webui_extensions/api_examples/txt2img.py b/onediff_sd_webui_extensions/api_examples/txt2img.py
index 2ba72960e..9cb4b2be5 100644
--- a/onediff_sd_webui_extensions/api_examples/txt2img.py
+++ b/onediff_sd_webui_extensions/api_examples/txt2img.py
@@ -6,12 +6,12 @@
 # And if you are using OneDiff Enterprise, add another
 # `"script_args" : [{"0": True}]` field to enable quantization
 
-from datetime import datetime
-import urllib.request
 import base64
 import json
-import time
 import os
+import time
+import urllib.request
+from datetime import datetime
 
 webui_server_url = "http://127.0.0.1:7860"
 
@@ -57,7 +57,6 @@ def call_txt2img_api(**payload):
         "width": 1024,
         "height": 1024,
         "cfg_scale": 7,
-        "sampler_name": "DPM++ 2M Karras",
         "n_iter": 1,
         "batch_size": 1,
         # Enable OneDiff speed up
diff --git a/onediff_sd_webui_extensions/compile_ldm.py b/onediff_sd_webui_extensions/compile_ldm.py
index e6a3aec06..e87f7f696 100644
--- a/onediff_sd_webui_extensions/compile_ldm.py
+++ b/onediff_sd_webui_extensions/compile_ldm.py
@@ -1,8 +1,6 @@
 import os
-import oneflow as flow
-from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.transform import proxy_class, register
 
+import oneflow as flow
 from ldm.modules.attention import (
     BasicTransformerBlock,
     CrossAttention,
@@ -17,6 +15,9 @@
     timestep_embedding,
 )
 
+from onediff.infer_compiler import oneflow_compile
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register
+
 __all__ = ["compile_ldm_unet"]
 
 
diff --git a/onediff_sd_webui_extensions/compile_sgm.py b/onediff_sd_webui_extensions/compile_sgm.py
index 12398a737..154b3dc5c 100644
--- a/onediff_sd_webui_extensions/compile_sgm.py
+++ b/onediff_sd_webui_extensions/compile_sgm.py
@@ -1,16 +1,20 @@
 import oneflow as flow
-from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.transform import proxy_class, register
 from sd_webui_onediff_utils import (
     CrossAttentionOflow,
     GroupNorm32Oflow,
     timestep_embedding,
 )
-from sgm.modules.attention import CrossAttention, SpatialTransformer
-from sgm.modules.diffusionmodules.openaimodel import UNetModel, ResBlock
-from sgm.modules.attention import BasicTransformerBlock
+from sgm.modules.attention import (
+    BasicTransformerBlock,
+    CrossAttention,
+    SpatialTransformer,
+)
+from sgm.modules.diffusionmodules.openaimodel import ResBlock, UNetModel
 from sgm.modules.diffusionmodules.util import GroupNorm32
 
+from onediff.infer_compiler import oneflow_compile
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register
+
 __all__ = ["compile_sgm_unet"]
 
 
diff --git a/onediff_sd_webui_extensions/compile_vae.py b/onediff_sd_webui_extensions/compile_vae.py
index d5c9c7f26..f3dd03204 100644
--- a/onediff_sd_webui_extensions/compile_vae.py
+++ b/onediff_sd_webui_extensions/compile_vae.py
@@ -1,22 +1,27 @@
 from modules import shared
-from modules.sd_vae_approx import model as get_vae_model, sd_vae_approx_models
 from modules.sd_vae_approx import VAEApprox
+from modules.sd_vae_approx import model as get_vae_model
+from modules.sd_vae_approx import sd_vae_approx_models
+
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.transform import proxy_class, register
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register
 
 __all__ = ["VaeCompileCtx"]
 
 compiled_models = {}
 
+
 class VAEApproxOflow(proxy_class(VAEApprox)):
     pass
 
+
 torch2oflow_class_map = {
     VAEApprox: VAEApproxOflow,
 }
 
 register(package_names=["modules"], torch2oflow_class_map=torch2oflow_class_map)
 
+
 class VaeCompileCtx(object):
     def __init__(self, options=None):
         self._options = options
diff --git a/onediff_sd_webui_extensions/onediff_hijack.py b/onediff_sd_webui_extensions/onediff_hijack.py
index f2683ac42..c8da677c6 100644
--- a/onediff_sd_webui_extensions/onediff_hijack.py
+++ b/onediff_sd_webui_extensions/onediff_hijack.py
@@ -1,6 +1,7 @@
-import oneflow
 import compile_ldm
 import compile_sgm
+import oneflow
+
 
 # https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/1c0a0c4c26f78c32095ebc7f8af82f5c04fca8c0/modules/sd_hijack_unet.py#L8
 class OneFlowHijackForUnet:
@@ -8,12 +9,15 @@ class OneFlowHijackForUnet:
     This is oneflow, but with cat that resizes tensors to appropriate dimensions if they do not match;
     this makes it possible to create pictures with dimensions that are multiples of 8 rather than 64
     """
+
     def __getattr__(self, item):
-        if item == 'cat':
+        if item == "cat":
             return self.cat
         if hasattr(oneflow, item):
             return getattr(oneflow, item)
-        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
+        raise AttributeError(
+            f"'{type(self).__name__}' object has no attribute '{item}'"
+        )
 
     def cat(self, tensors, *args, **kwargs):
         if len(tensors) == 2:
@@ -22,11 +26,13 @@ def cat(self, tensors, *args, **kwargs):
             tensors = (a, b)
         return oneflow.cat(tensors, *args, **kwargs)
 
+
 hijack_flow = OneFlowHijackForUnet()
 
+
 def unload_model_weights(sd_model=None, info=None):
-    from modules import lowvram, devices
-    from modules import shared
+    from modules import devices, lowvram, shared
+
     m = sd_model or shared.sd_model
     if m.lowvram:
         lowvram.send_everything_to_cpu()
@@ -35,10 +41,12 @@ def unload_model_weights(sd_model=None, info=None):
     devices.torch_gc()
     return sd_model
 
+
 def send_model_to_cpu(m):
     # do nothing
     pass
 
+
 def hijack_function(module, name, new_name, new_value):
     # restore original function in case of reload
     unhijack_function(module=module, name=name, new_name=new_name)
@@ -51,35 +59,39 @@ def unhijack_function(module, name, new_name):
         setattr(module, name, getattr(module, new_name))
         delattr(module, new_name)
 
+
 def do_hijack():
     compile_ldm.flow = hijack_flow
     compile_sgm.flow = hijack_flow
-    from modules import sd_models, script_callbacks
+    from modules import script_callbacks, sd_models
+
     script_callbacks.on_script_unloaded(undo_hijack)
     hijack_function(
         module=sd_models,
-        name='unload_model_weights',
-        new_name='__onediff_original_unload_model_weights',
+        name="unload_model_weights",
+        new_name="__onediff_original_unload_model_weights",
         new_value=unload_model_weights,
     )
     hijack_function(
         module=sd_models,
-        name='send_model_to_cpu',
-        new_name='__onediff_original_send_model_to_cpu',
+        name="send_model_to_cpu",
+        new_name="__onediff_original_send_model_to_cpu",
         new_value=send_model_to_cpu,
     )
 
+
 def undo_hijack():
     compile_ldm.flow = oneflow
     compile_sgm.flow = oneflow
     from modules import sd_models
+
     unhijack_function(
         module=sd_models,
-        name='unload_model_weights',
-        new_name='__onediff_original_unload_model_weights',
+        name="unload_model_weights",
+        new_name="__onediff_original_unload_model_weights",
     )
     unhijack_function(
         module=sd_models,
-        name='send_model_to_cpu',
-        new_name='__onediff_original_send_model_to_cpu',
+        name="send_model_to_cpu",
+        new_name="__onediff_original_send_model_to_cpu",
     )
diff --git a/onediff_sd_webui_extensions/onediff_lora.py b/onediff_sd_webui_extensions/onediff_lora.py
index 77066873f..0bee88e9d 100644
--- a/onediff_sd_webui_extensions/onediff_lora.py
+++ b/onediff_sd_webui_extensions/onediff_lora.py
@@ -1,6 +1,9 @@
 import torch
+
 from onediff.infer_compiler import DeployableModule
-from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor
+from onediff.infer_compiler.backends.oneflow.param_utils import (
+    update_graph_related_tensor,
+)
 
 
 class HijackLoraActivate:
diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py
index 3c7e887cd..5e5766c04 100644
--- a/onediff_sd_webui_extensions/scripts/onediff.py
+++ b/onediff_sd_webui_extensions/scripts/onediff.py
@@ -1,37 +1,42 @@
 import os
-import zipfile
 import warnings
-import gradio as gr
+import zipfile
 from pathlib import Path
-from typing import Union, Dict
+from typing import Dict, Union
+
+import gradio as gr
 import modules.scripts as scripts
 import modules.shared as shared
-from modules.sd_models import select_checkpoint
-from modules.processing import process_images
-from modules.ui_common import create_refresh_button
-from modules import script_callbacks
-
-from ui_utils import hints_message, get_all_compiler_caches, refresh_all_compiler_caches, all_compiler_caches_path
-from compile_ldm import compile_ldm_unet, SD21CompileCtx
+from compile_ldm import SD21CompileCtx, compile_ldm_unet
 from compile_sgm import compile_sgm_unet
 from compile_vae import VaeCompileCtx
-from onediff_lora import HijackLoraActivate
+from modules import script_callbacks
+from modules.processing import process_images
+from modules.sd_models import select_checkpoint
+from modules.ui_common import create_refresh_button
 from onediff_hijack import do_hijack as onediff_do_hijack
+from onediff_lora import HijackLoraActivate
+from oneflow import __version__ as oneflow_version
+from ui_utils import (
+    all_compiler_caches_path,
+    get_all_compiler_caches,
+    hints_message,
+    refresh_all_compiler_caches,
+)
 
-from onediff.infer_compiler.utils.log_utils import logger
+from onediff import __version__ as onediff_version
 from onediff.optimization.quant_optimizer import (
     quantize_model,
     varify_can_use_quantization,
 )
-from onediff.infer_compiler.utils.env_var import parse_boolean_from_env
-from onediff import __version__ as onediff_version
-from oneflow import __version__ as oneflow_version
+from onediff.utils import logger, parse_boolean_from_env
 
 """oneflow_compiled UNetModel"""
 compiled_unet = None
 is_unet_quantized = False
 compiled_ckpt_name = None
 
+
 def generate_graph_path(ckpt_name: str, model_name: str) -> str:
     base_output_dir = shared.opts.outdir_samples or shared.opts.outdir_txt2img_samples
     save_ckpt_graphs_path = os.path.join(base_output_dir, "graphs", ckpt_name)
@@ -119,14 +124,29 @@ def ui(self, is_img2img):
         """
         with gr.Row():
             # TODO: set choices as Tuple[str, str] after the version of gradio specified webui upgrades
-            compiler_cache = gr.Dropdown(label="Compiler caches (Beta)", choices=["None"] + get_all_compiler_caches(), value="None", elem_id="onediff_compiler_cache")
-            refresh_button = create_refresh_button(compiler_cache, refresh_all_compiler_caches, lambda: {"choices": ["None"] + get_all_compiler_caches()}, "onediff_refresh_compiler_caches")
+            compiler_cache = gr.Dropdown(
+                label="Compiler caches (Beta)",
+                choices=["None"] + get_all_compiler_caches(),
+                value="None",
+                elem_id="onediff_compiler_cache",
+            )
+            create_refresh_button(
+                compiler_cache,
+                refresh_all_compiler_caches,
+                lambda: {"choices": ["None"] + get_all_compiler_caches()},
+                "onediff_refresh_compiler_caches",
+            )
             save_cache_name = gr.Textbox(label="Saved cache name (Beta)")
         with gr.Row():
-            always_recompile = gr.components.Checkbox(label="always_recompile", visible=parse_boolean_from_env("ONEDIFF_DEBUG"))
-        if not varify_can_use_quantization():
-            gr.HTML(hints_message)
-        is_quantized = gr.components.Checkbox(label="Model Quantization(int8) Speed Up", visible=varify_can_use_quantization())
+            always_recompile = gr.components.Checkbox(
+                label="always_recompile",
+                visible=parse_boolean_from_env("ONEDIFF_DEBUG"),
+            )
+        gr.HTML(hints_message, elem_id="hintMessage", visible=not varify_can_use_quantization())
+        is_quantized = gr.components.Checkbox(
+            label="Model Quantization(int8) Speed Up",
+            visible=varify_can_use_quantization(),
+        )
         return [is_quantized, compiler_cache, save_cache_name, always_recompile]
 
     def show(self, is_img2img):
@@ -143,7 +163,7 @@ def get_model_type(model):
                 "is_ssd": model.is_ssd,
             }
 
-        if self.current_type == None:
+        if self.current_type is None:
             is_changed = True
         else:
             for key, v in self.current_type.items():
@@ -151,11 +171,18 @@ def get_model_type(model):
                     is_changed = True
                     break
 
-        if is_changed == True:
+        if is_changed is True:
             self.current_type = get_model_type(model)
         return is_changed
 
-    def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", always_recompile=False):
+    def run(
+        self,
+        p,
+        quantization=False,
+        compiler_cache=None,
+        saved_cache_name="",
+        always_recompile=False,
+    ):
 
         global compiled_unet, compiled_ckpt_name, is_unet_quantized
         current_checkpoint = shared.opts.sd_model_checkpoint
@@ -165,9 +192,11 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a
         model_changed = self.check_model_change(shared.sd_model)
         quantization_changed = quantization != is_unet_quantized
         need_recompile = (
-            (quantization and ckpt_changed) # always recompile when switching ckpt with 'int8 speed model' enabled
-            or model_changed                # always recompile when switching model to another structure
-            or quantization_changed         # always recompile when switching model from non-quantized to quantized (and vice versa) 
+            (
+                quantization and ckpt_changed
+            )  # always recompile when switching ckpt with 'int8 speed model' enabled
+            or model_changed  # always recompile when switching model to another structure
+            or quantization_changed  # always recompile when switching model from non-quantized to quantized (and vice versa)
             or always_recompile
         )
 
@@ -178,16 +207,23 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a
                 original_diffusion_model, quantization=quantization
             )
 
-            if compiler_cache != "None":
+            # Due to the version of gradio compatible with sd-webui, the CompilerCache dropdown box always returns a string
+            if compiler_cache not in [None, "None"]:
                 compiler_cache_path = all_compiler_caches_path() + f"/{compiler_cache}"
                 if not Path(compiler_cache_path).exists():
-                    raise FileNotFoundError(f"Cannot find cache {compiler_cache_path}, please make sure it exists")
+                    raise FileNotFoundError(
+                        f"Cannot find cache {compiler_cache_path}, please make sure it exists"
+                    )
                 try:
                     compiled_unet.load_graph(compiler_cache_path, run_warmup=True)
-                except zipfile.BadZipFile as e:
-                    raise RuntimeError("Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui")
+                except zipfile.BadZipFile:
+                    raise RuntimeError(
+                        "Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui"
+                    )
                 except Exception as e:
-                    raise RuntimeError("Load cache failed. Please make sure cache has the same sd version (or unet architure) with current checkpoint")
+                    raise RuntimeError(
+                        f"Load cache failed ({e}). Please make sure cache has the same sd version (or unet architure) with current checkpoint"
+                    )
 
         else:
             logger.info(
@@ -199,8 +235,10 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a
 
         if saved_cache_name != "":
             if not os.access(str(all_compiler_caches_path()), os.W_OK):
-                raise PermissionError(f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \
-                                      Please change it in the settings to a directory with write permissions")
+                raise PermissionError(
+                    f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \
+                                      Please change it in the settings to a directory with write permissions"
+                )
             if not Path(all_compiler_caches_path()).exists():
                 Path(all_compiler_caches_path()).mkdir()
             saved_cache_name = all_compiler_caches_path() + f"/{saved_cache_name}"
@@ -209,10 +247,18 @@ def run(self, p, quantization=False, compiler_cache=None, saved_cache_name="", a
 
         return proc
 
+
 def on_ui_settings():
-    section = ('onediff', "OneDiff")
-    shared.opts.add_option("onediff_compiler_caches_path", shared.OptionInfo(
-        str(Path(__file__).parent.parent / "compiler_caches"), "Directory for onediff compiler caches", section=section))
+    section = ("onediff", "OneDiff")
+    shared.opts.add_option(
+        "onediff_compiler_caches_path",
+        shared.OptionInfo(
+            str(Path(__file__).parent.parent / "compiler_caches"),
+            "Directory for onediff compiler caches",
+            section=section,
+        ),
+    )
+
 
 script_callbacks.on_ui_settings(on_ui_settings)
 onediff_do_hijack()
diff --git a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py
index 3b95b837c..19378d59c 100644
--- a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py
+++ b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sd.py
@@ -4,7 +4,10 @@
 # *Only* converts the UNet, VAE, and Text Encoder.
 # Does not convert optimizer state or any other thing.
 
-__all__ = ["convert_sd", "convert_unet_calibrate_info_sd"]
+__all__ = [
+    # "convert_sd",
+    "convert_unet_calibrate_info_sd",
+]
 
 import argparse
 import os.path as osp
@@ -14,7 +17,6 @@
 import torch
 from safetensors.torch import load_file, save_file
 
-
 # =================#
 # UNet Conversion #
 # =================#
@@ -304,6 +306,7 @@ def convert_text_enc_state_dict_v20(text_enc_dict):
 def convert_text_enc_state_dict(text_enc_dict):
     return text_enc_dict
 
+
 def convert_unet_calibrate_dict(state_dict) -> str:
     mapping = {k: k for k in state_dict}
     remove_suffix = (
@@ -345,14 +348,31 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path):
         for name, info in dst_info.items():
             f.write(f"{name} {info}\n")
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
-    parser.add_argument("--model_path", default=None, type=str, required=True, help="Path to the model to convert.")
-    parser.add_argument("--checkpoint_path", default=None, type=str, required=True, help="Path to the output model.")
-    parser.add_argument("--half", action="store_true", help="Save weights in half precision.")
     parser.add_argument(
-        "--use_safetensors", action="store_true", help="Save weights use safetensors, default is ckpt."
+        "--model_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the model to convert.",
+    )
+    parser.add_argument(
+        "--checkpoint_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the output model.",
+    )
+    parser.add_argument(
+        "--half", action="store_true", help="Save weights in half precision."
+    )
+    parser.add_argument(
+        "--use_safetensors",
+        action="store_true",
+        help="Save weights use safetensors, default is ckpt.",
     )
 
     args = parser.parse_args()
@@ -387,7 +407,9 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path):
 
     # Convert the UNet model
     unet_state_dict = convert_unet_state_dict(unet_state_dict)
-    unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()}
+    unet_state_dict = {
+        "model.diffusion_model." + k: v for k, v in unet_state_dict.items()
+    }
 
     # Convert the VAE model
     vae_state_dict = convert_vae_state_dict(vae_state_dict)
@@ -400,10 +422,14 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path):
         # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm
         text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()}
         text_enc_dict = convert_text_enc_state_dict_v20(text_enc_dict)
-        text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()}
+        text_enc_dict = {
+            "cond_stage_model.model." + k: v for k, v in text_enc_dict.items()
+        }
     else:
         text_enc_dict = convert_text_enc_state_dict(text_enc_dict)
-        text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()}
+        text_enc_dict = {
+            "cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()
+        }
 
     # Put together new checkpoint
     state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict}
@@ -416,8 +442,13 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path):
         state_dict = {"state_dict": state_dict}
         torch.save(state_dict, args.checkpoint_path)
 
-    calibrate_info_save_path = Path(args.checkpoint_path).parent / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt"
-    convert_unet_calibrate_info_sd(args.model_path + "/calibrate_info.txt", calibrate_info_save_path)
+    calibrate_info_save_path = (
+        Path(args.checkpoint_path).parent
+        / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt"
+    )
+    convert_unet_calibrate_info_sd(
+        args.model_path + "/calibrate_info.txt", calibrate_info_save_path
+    )
 
 # def get_unet_state_dict(model_path):
 #     unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors")
@@ -490,4 +521,3 @@ def convert_unet_calibrate_info_sd(calibration_path, dst_path):
 #     else:
 #         state_dict = {"state_dict": state_dict}
 #         torch.save(state_dict, checkpoint_path)
-
diff --git a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py
index 7284c9293..e97c5849f 100644
--- a/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py
+++ b/onediff_sd_webui_extensions/tools/convert_diffusers_to_sdxl.py
@@ -4,7 +4,10 @@
 # *Only* converts the UNet, VAE, and Text Encoder.
 # Does not convert optimizer state or any other thing.
 
-__all__ = ["convert_sdxl", "convert_unet_calibrate_info_sdxl"]
+__all__ = [
+    # "convert_sdxl",
+    "convert_unet_calibrate_info_sdxl",
+]
 
 import argparse
 import os.path as osp
@@ -14,7 +17,6 @@
 import torch
 from safetensors.torch import load_file, save_file
 
-
 # =================#
 # UNet Conversion #
 # =================#
@@ -285,6 +287,7 @@ def convert_openclip_text_enc_state_dict(text_enc_dict):
 def convert_openai_text_enc_state_dict(text_enc_dict):
     return text_enc_dict
 
+
 def convert_unet_calibrate_dict(state_dict) -> str:
     mapping = {k: k for k in state_dict}
     remove_suffix = (
@@ -333,11 +336,27 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
-    parser.add_argument("--model_path", default=None, type=str, required=True, help="Path to the model to convert.")
-    parser.add_argument("--checkpoint_path", default=None, type=str, required=True, help="Path to the output model.")
-    parser.add_argument("--half", action="store_true", help="Save weights in half precision.")
     parser.add_argument(
-        "--use_safetensors", action="store_true", help="Save weights use safetensors, default is ckpt."
+        "--model_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the model to convert.",
+    )
+    parser.add_argument(
+        "--checkpoint_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the output model.",
+    )
+    parser.add_argument(
+        "--half", action="store_true", help="Save weights in half precision."
+    )
+    parser.add_argument(
+        "--use_safetensors",
+        action="store_true",
+        help="Save weights use safetensors, default is ckpt.",
     )
 
     args = parser.parse_args()
@@ -374,12 +393,16 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path):
     if osp.exists(text_enc_2_path):
         text_enc_2_dict = load_file(text_enc_2_path, device="cpu")
     else:
-        text_enc_2_path = osp.join(args.model_path, "text_encoder_2", "pytorch_model.bin")
+        text_enc_2_path = osp.join(
+            args.model_path, "text_encoder_2", "pytorch_model.bin"
+        )
         text_enc_2_dict = torch.load(text_enc_2_path, map_location="cpu")
 
     # Convert the UNet model
     unet_state_dict = convert_unet_state_dict(unet_state_dict)
-    unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()}
+    unet_state_dict = {
+        "model.diffusion_model." + k: v for k, v in unet_state_dict.items()
+    }
 
     # Convert the VAE model
     vae_state_dict = convert_vae_state_dict(vae_state_dict)
@@ -387,19 +410,30 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path):
 
     # Convert text encoder 1
     text_enc_dict = convert_openai_text_enc_state_dict(text_enc_dict)
-    text_enc_dict = {"conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items()}
+    text_enc_dict = {
+        "conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items()
+    }
 
     # Convert text encoder 2
     text_enc_2_dict = convert_openclip_text_enc_state_dict(text_enc_2_dict)
-    text_enc_2_dict = {"conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items()}
+    text_enc_2_dict = {
+        "conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items()
+    }
     # We call the `.T.contiguous()` to match what's done in
     # https://github.com/huggingface/diffusers/blob/84905ca7287876b925b6bf8e9bb92fec21c78764/src/diffusers/loaders/single_file_utils.py#L1085
-    text_enc_2_dict["conditioner.embedders.1.model.text_projection"] = text_enc_2_dict.pop(
+    text_enc_2_dict[
+        "conditioner.embedders.1.model.text_projection"
+    ] = text_enc_2_dict.pop(
         "conditioner.embedders.1.model.text_projection.weight"
     ).T.contiguous()
 
     # Put together new checkpoint
-    state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict, **text_enc_2_dict}
+    state_dict = {
+        **unet_state_dict,
+        **vae_state_dict,
+        **text_enc_dict,
+        **text_enc_2_dict,
+    }
 
     if args.half:
         state_dict = {k: v.half() for k, v in state_dict.items()}
@@ -410,8 +444,13 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path):
         state_dict = {"state_dict": state_dict}
         torch.save(state_dict, args.checkpoint_path)
 
-    calibrate_info_save_path = Path(args.checkpoint_path).parent / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt"
-    convert_unet_calibrate_info_sdxl(args.model_path + "/calibrate_info.txt", calibrate_info_save_path)
+    calibrate_info_save_path = (
+        Path(args.checkpoint_path).parent
+        / f"{Path(args.checkpoint_path).stem}_sd_calibrate_info.txt"
+    )
+    convert_unet_calibrate_info_sdxl(
+        args.model_path + "/calibrate_info.txt", calibrate_info_save_path
+    )
 
 
 # def get_unet_state_dict(model_path):
@@ -497,4 +536,3 @@ def convert_unet_calibrate_info_sdxl(calibration_path, dst_path):
 #     else:
 #         state_dict = {"state_dict": state_dict}
 #         torch.save(state_dict, checkpoint_path)
-
diff --git a/onediff_sd_webui_extensions/ui_utils.py b/onediff_sd_webui_extensions/ui_utils.py
index 7feea4eaa..7e442be4a 100644
--- a/onediff_sd_webui_extensions/ui_utils.py
+++ b/onediff_sd_webui_extensions/ui_utils.py
@@ -1,42 +1,48 @@
 from pathlib import Path
+from textwrap import dedent
 
-hints_message = """
-                    <div style="padding: 20px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #f9f9f9;">
-                        <div style="font-size: 18px; font-weight: bold; margin-bottom: 15px; color: #31708f;">
-                            Hints Message
-                        </div>
-                        <div style="padding: 10px; border: 1px solid #31708f; border-radius: 5px; background-color: #f9f9f9;">
-                            Hints: Enterprise function is not supported on your system.
-                        </div>
-                        <p style="margin-top: 15px;">
-                            If you need Enterprise Level Support for your system or business, please send an email to 
-                            <a href="mailto:business@siliconflow.com" style="color: #31708f; text-decoration: none;">business@siliconflow.com</a>.
-                            <br>
-                            Tell us about your use case, deployment scale, and requirements.
-                        </p>
-                        <p>
-                            <strong>GitHub Issue:</strong>
-                            <a href="https://github.com/siliconflow/onediff/issues" style="color: #31708f; text-decoration: none;">https://github.com/siliconflow/onediff/issues</a>
-                        </p>
-                    </div>
-                    """
+hints_message = dedent("""\
+<div id="hintMessage" style="position: relative; padding: 20px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #f9f9f9;">
+    <button onclick="document.getElementById('hintMessage').style.display = 'none'" style="position: absolute; top: 10px; right: 10px; background: none; border: none; font-size: 18px; cursor: pointer;">&times;</button>
+    <div style="font-size: 18px; font-weight: bold; margin-bottom: 15px; color: #31708f;">
+        Hints Message
+    </div>
+    <div style="padding: 10px; border: 1px solid #31708f; border-radius: 5px; background-color: #f9f9f9;">
+        Hints: Enterprise function is not supported on your system.
+    </div>
+    <p style="margin-top: 15px;">
+        If you need Enterprise Level Support for your system or business, please send an email to
+        <a href="mailto:business@siliconflow.com" style="color: #31708f; text-decoration: none;">business@siliconflow.com</a>.
+        <br>
+        Tell us about your use case, deployment scale, and requirements.
+    </p>
+    <p>
+        <strong>GitHub Issue:</strong>
+        <a href="https://github.com/siliconflow/onediff/issues" style="color: #31708f; text-decoration: none;">https://github.com/siliconflow/onediff/issues</a>
+    </p>
+</div>
+""")
 
 all_compiler_caches = []
 
+
 def all_compiler_caches_path():
     import modules.shared as shared
+
     caches_path = Path(shared.opts.onediff_compiler_caches_path)
     if not caches_path.exists():
         caches_path.mkdir(parents=True)
     return shared.opts.onediff_compiler_caches_path
 
+
 def get_all_compiler_caches():
     global all_compiler_caches
     if len(all_compiler_caches) == 0:
         refresh_all_compiler_caches()
     return all_compiler_caches
 
+
 def refresh_all_compiler_caches(path: Path = None):
     global all_compiler_caches
     path = path or all_compiler_caches_path()
-    all_compiler_caches = [f.stem for f in Path(path).iterdir() if f.is_file()]  
\ No newline at end of file
+    all_compiler_caches = [f.stem for f in Path(path).iterdir() if f.is_file()]
diff --git a/src/infer_compiler_registry/register_diffusers/__init__.py b/src/infer_compiler_registry/register_diffusers/__init__.py
index 98f15954a..0709a45ce 100644
--- a/src/infer_compiler_registry/register_diffusers/__init__.py
+++ b/src/infer_compiler_registry/register_diffusers/__init__.py
@@ -1,4 +1,4 @@
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 
 from packaging import version
 import importlib.metadata
@@ -15,11 +15,19 @@
 
 if diffusers_version < version.parse("0.26.00"):
     from diffusers.models.unet_2d_condition import UNet2DConditionModel
-    from diffusers.models.unet_2d_blocks import AttnUpBlock2D, CrossAttnUpBlock2D, UpBlock2D
+    from diffusers.models.unet_2d_blocks import (
+        AttnUpBlock2D,
+        CrossAttnUpBlock2D,
+        UpBlock2D,
+    )
     from diffusers.models.transformer_2d import Transformer2DModel
 else:
     from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
-    from diffusers.models.unets.unet_2d_blocks import AttnUpBlock2D, CrossAttnUpBlock2D, UpBlock2D
+    from diffusers.models.unets.unet_2d_blocks import (
+        AttnUpBlock2D,
+        CrossAttnUpBlock2D,
+        UpBlock2D,
+    )
     from diffusers.models.transformers.transformer_2d import Transformer2DModel
 
 if diffusers_version >= version.parse("0.25.00"):
@@ -34,7 +42,9 @@
         from diffusers.models.unets.unet_spatio_temporal_condition import (
             UNetSpatioTemporalConditionModel,
         )
-        from diffusers.models.transformers.transformer_temporal import TransformerSpatioTemporalModel
+        from diffusers.models.transformers.transformer_temporal import (
+            TransformerSpatioTemporalModel,
+        )
     else:
         from diffusers.models.transformer_temporal import TransformerSpatioTemporalModel
         from diffusers.models.unet_spatio_temporal_condition import (
@@ -47,8 +57,10 @@
         )
     else:
         from diffusers.models.autoencoder_kl_temporal_decoder import TemporalDecoder
-    
-    from .spatio_temporal_oflow import SpatioTemporalResBlock as SpatioTemporalResBlockOflow
+
+    from .spatio_temporal_oflow import (
+        SpatioTemporalResBlock as SpatioTemporalResBlockOflow,
+    )
     from .spatio_temporal_oflow import TemporalDecoder as TemporalDecoderOflow
     from .spatio_temporal_oflow import (
         TransformerSpatioTemporalModel as TransformerSpatioTemporalModelOflow,
diff --git a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py
index a8b0e571e..8bf0a367e 100644
--- a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py
+++ b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py
@@ -22,7 +22,7 @@
 import diffusers
 from diffusers.utils import deprecate, logging
 
-from onediff.infer_compiler.utils import parse_boolean_from_env, set_boolean_env_var
+from onediff.utils import parse_boolean_from_env, set_boolean_env_var
 
 
 def is_xformers_available():
diff --git a/src/infer_compiler_registry/register_diffusers/resnet_oflow.py b/src/infer_compiler_registry/register_diffusers/resnet_oflow.py
index 5e33c9970..3133cabab 100644
--- a/src/infer_compiler_registry/register_diffusers/resnet_oflow.py
+++ b/src/infer_compiler_registry/register_diffusers/resnet_oflow.py
@@ -5,7 +5,7 @@
 from packaging import version
 import importlib.metadata
 
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 transformed_diffusers = transform_mgr.transform_package("diffusers")
 
diff --git a/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py b/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py
index 12dbb49d2..fd4aacb54 100644
--- a/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py
+++ b/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py
@@ -30,7 +30,7 @@
 
 if diffusers_version >= diffusers_0240_v:
 
-    from onediff.infer_compiler.transform import transform_mgr
+    from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
     transformed_diffusers = transform_mgr.transform_package("diffusers")
 
diff --git a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py
index f343356b8..e07371411 100644
--- a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py
+++ b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py
@@ -6,7 +6,7 @@
 import oneflow as torch
 import oneflow.nn.functional as F
 from oneflow import nn
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 transformed_diffusers = transform_mgr.transform_package("diffusers")
 
@@ -968,7 +968,9 @@ def forward(
             if diffusers_version >= diffusers_0270_v:
                 if cross_attention_kwargs is not None:
                     if cross_attention_kwargs.get("scale", None) is not None:
-                        logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
+                        logger.warning(
+                            "Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored."
+                        )
             # ensure attention_mask is a bias, and give it a singleton query_tokens dimension.
             #   we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward.
             #   we can tell by counting dims; if ndim == 2: it's a mask rather than a bias.
diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py
index 86234c5c7..54ae20ae3 100644
--- a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py
+++ b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py
@@ -2,7 +2,7 @@
 from packaging import version
 import importlib.metadata
 import oneflow as torch
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 diffusers_0210_v = version.parse("0.21.0")
 diffusers_version = version.parse(importlib.metadata.version("diffusers"))
@@ -70,7 +70,9 @@ def custom_forward(*inputs):
 
                     ckpt_kwargs: Dict[str, Any] = {
                         "use_reentrant": False
-                    } if transformed_diffusers.utils.is_torch_version(">=", "1.11.0") else {}
+                    } if transformed_diffusers.utils.is_torch_version(
+                        ">=", "1.11.0"
+                    ) else {}
                     hidden_states = torch.utils.checkpoint.checkpoint(
                         create_custom_forward(resnet),
                         hidden_states,
@@ -236,7 +238,9 @@ def custom_forward(*inputs):
 
                     ckpt_kwargs: Dict[str, Any] = {
                         "use_reentrant": False
-                    } if transformed_diffusers.utils.is_torch_version(">=", "1.11.0") else {}
+                    } if transformed_diffusers.utils.is_torch_version(
+                        ">=", "1.11.0"
+                    ) else {}
                     hidden_states = torch.utils.checkpoint.checkpoint(
                         create_custom_forward(resnet),
                         hidden_states,
diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py
index c36303415..d87724d07 100644
--- a/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py
+++ b/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py
@@ -2,7 +2,7 @@
 from packaging import version
 import importlib.metadata
 import oneflow as torch
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 diffusers_0210_v = version.parse("0.21.0")
 diffusers_version = version.parse(importlib.metadata.version("diffusers"))
diff --git a/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py b/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py
index d8bf735f9..fb2028b40 100644
--- a/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py
+++ b/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py
@@ -1,4 +1,4 @@
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 
 import oneflow as flow
 import diffusers_enterprise_lite
diff --git a/src/infer_compiler_registry/register_onediff_quant/__init__.py b/src/infer_compiler_registry/register_onediff_quant/__init__.py
index e9ab3afd8..dd5a37a26 100644
--- a/src/infer_compiler_registry/register_onediff_quant/__init__.py
+++ b/src/infer_compiler_registry/register_onediff_quant/__init__.py
@@ -1,4 +1,4 @@
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 
 import oneflow as flow
 import onediff_quant
diff --git a/src/onediff/infer_compiler/__init__.py b/src/onediff/infer_compiler/__init__.py
index bff98d894..7110e897e 100644
--- a/src/onediff/infer_compiler/__init__.py
+++ b/src/onediff/infer_compiler/__init__.py
@@ -1,10 +1,4 @@
 import os
 import torch
 
-from .core import *
-from .utils import set_default_env_vars
-from .utils.options import CompileOptions
-from .utils.options import _GLOBAL_compile_options as compile_options
-
-
-set_default_env_vars()
+from .backends import *
diff --git a/src/onediff/infer_compiler/backends/__init__.py b/src/onediff/infer_compiler/backends/__init__.py
index e69de29bb..bbaef1412 100644
--- a/src/onediff/infer_compiler/backends/__init__.py
+++ b/src/onediff/infer_compiler/backends/__init__.py
@@ -0,0 +1,4 @@
+from .deployable_module import DeployableModule
+from .compiler import compile
+from .compiler import oneflow_compile
+from .oneflow import OneflowCompileOptions
diff --git a/src/onediff/infer_compiler/core/with_onediff_compile.py b/src/onediff/infer_compiler/backends/compiler.py
similarity index 90%
rename from src/onediff/infer_compiler/core/with_onediff_compile.py
rename to src/onediff/infer_compiler/backends/compiler.py
index 3ab038162..4bf91bb83 100644
--- a/src/onediff/infer_compiler/core/with_onediff_compile.py
+++ b/src/onediff/infer_compiler/backends/compiler.py
@@ -1,12 +1,14 @@
 import torch
+
 from .deployable_module import DeployableModule
 
 _DEFAULT_BACKEND = "oneflow"
 
+
 def compile(
     torch_module: torch.nn.Module, *, backend=_DEFAULT_BACKEND, options=None
 ) -> DeployableModule:
-    from ..backends.registry import lookup_backend
+    from .registry import lookup_backend
 
     backend = lookup_backend(backend)
     model = backend(torch_module, options=options)
diff --git a/src/onediff/infer_compiler/core/deployable_module.py b/src/onediff/infer_compiler/backends/deployable_module.py
similarity index 100%
rename from src/onediff/infer_compiler/core/deployable_module.py
rename to src/onediff/infer_compiler/backends/deployable_module.py
diff --git a/src/onediff/infer_compiler/backends/nexfort.py b/src/onediff/infer_compiler/backends/nexfort.py
deleted file mode 100644
index 67cca8cbc..000000000
--- a/src/onediff/infer_compiler/backends/nexfort.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import dataclasses
-import torch
-from .registry import register_backend
-
-
-def make_inductor_options(options):
-    inductor_options = {}
-    if options is None:
-        return inductor_options
-    for filed in dataclasses.fields(options):
-        filed_name = filed.name
-        inductor_options[f"inductor.{filed_name}"] = getattr(options, filed_name)
-    return inductor_options
-
-
-@register_backend("nexfort")
-def compile(torch_module: torch.nn.Module, *, options=None):
-    from nexfort.utils.memory_format import apply_memory_format
-    from nexfort.compilers import nexfort_compile
-    from ..nexfort.deployable_module import NexfortDeployableModule
-    from ..utils import CompileOptions
-
-    options = options if options is not None else CompileOptions()
-    nexfort_options = options.nexfort
-    if nexfort_options.memory_format != torch.preserve_format:
-        model = apply_memory_format(
-            torch_module, memory_format=nexfort_options.memory_format
-        )
-    model = nexfort_compile(
-        model, options=make_inductor_options(nexfort_options.inductor)
-    )
-    return NexfortDeployableModule(model)
diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md
new file mode 100644
index 000000000..a36f38d5c
--- /dev/null
+++ b/src/onediff/infer_compiler/backends/nexfort/README.md
@@ -0,0 +1,33 @@
+## nexfort backend for compiler in onediff
+###  Dependency
+```
+pip3 install --pre -U torch==2.4.0.dev20240507 torchaudio==2.2.0.dev20240507+cu124 torchvision==0.19.0.dev20240507+cu124 --index-url https://download.pytorch.org/whl/nightly/cu124
+pip3 install -U torchao==0.1
+```
+
+### Install nexfort
+
+Before installing nextfort, please make sure that the corresponding PyTorch and CUDA environments are installed.
+
+```
+# PyTorch 2.3.0, CUDA 12.1
+pip3 install https://nexfort-releases.oss-cn-hangzhou.aliyuncs.com/nexfort-0.1.dev215%2Btorch230cu121-cp310-cp310-manylinux2014_x86_64.whl
+
+# PyTorch 2.4.0, CUDA 12.1
+pip3 install https://nexfort-releases.oss-cn-hangzhou.aliyuncs.com/nexfort-0.1.dev215%2Btorch240dev20240507cu121-cp310-cp310-manylinux2014_x86_64.whl
+
+# PyTorch 2.4.0, CUDA 12.4
+pip3 install https://nexfort-releases.oss-cn-hangzhou.aliyuncs.com/nexfort-0.1.dev215%2Btorch240dev20240507cu124-cp310-cp310-manylinux2014_x86_64.whl
+```
+
+### Run pixart alpha (with nexfort backend)
+
+```
+# model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ 
+python3 ./benchmarks/text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --steps 20 --compiler nexfort --output-image ./pixart_alpha_nex.png
+```
+Performance on NVIDIA A100-PCIE-40GB:
+- Warmup time: 771.418s
+- Inference time: 2.045s
+- Iterations per second: 10.743
+- Max used CUDA memory: 13.855GiB
diff --git a/src/onediff/infer_compiler/backends/nexfort/__init__.py b/src/onediff/infer_compiler/backends/nexfort/__init__.py
new file mode 100644
index 000000000..1ea5f954e
--- /dev/null
+++ b/src/onediff/infer_compiler/backends/nexfort/__init__.py
@@ -0,0 +1 @@
+from . import nexfort as _nexfort_backend
diff --git a/src/onediff/infer_compiler/nexfort/deployable_module.py b/src/onediff/infer_compiler/backends/nexfort/deployable_module.py
similarity index 58%
rename from src/onediff/infer_compiler/nexfort/deployable_module.py
rename to src/onediff/infer_compiler/backends/nexfort/deployable_module.py
index eb8a91be2..a9e94977e 100644
--- a/src/onediff/infer_compiler/nexfort/deployable_module.py
+++ b/src/onediff/infer_compiler/backends/nexfort/deployable_module.py
@@ -1,12 +1,13 @@
 import torch
-from ..core.deployable_module import DeployableModule
+
+from ..deployable_module import DeployableModule
 
 
 class NexfortDeployableModule(DeployableModule):
-    def __init__(self, torch_module):
+    def __init__(self, compiled_module, torch_module):
         torch.nn.Module.__init__(self)
-        object.__setattr__(self, "_deployable_module_model", torch_module)
-        object.__setattr__(self, "_modules", torch_module._modules)
+        object.__setattr__(self, "_deployable_module_model", compiled_module)
+        object.__setattr__(self, "_modules", compiled_module._modules)
         object.__setattr__(self, "_torch_module", torch_module)
 
     def __call__(self, *args, **kwargs):
diff --git a/src/onediff/infer_compiler/backends/nexfort/nexfort.py b/src/onediff/infer_compiler/backends/nexfort/nexfort.py
new file mode 100644
index 000000000..d7f2fa69d
--- /dev/null
+++ b/src/onediff/infer_compiler/backends/nexfort/nexfort.py
@@ -0,0 +1,17 @@
+import dataclasses
+import torch
+from ..registry import register_backend
+
+
+@register_backend("nexfort")
+def compile(torch_module: torch.nn.Module, *, options=None):
+    from nexfort.compilers import nexfort_compile
+    if isinstance(options, str):
+        import json
+
+        # TODO(): using jsonschema to define the options schema
+        options = json.loads(options)
+
+    nexfort_options = options if options is not None else dict()
+    compiled_model = nexfort_compile(torch_module, **nexfort_options)
+    return compiled_model
diff --git a/src/onediff/infer_compiler/backends/oneflow/__init__.py b/src/onediff/infer_compiler/backends/oneflow/__init__.py
new file mode 100644
index 000000000..69c5c9b11
--- /dev/null
+++ b/src/onediff/infer_compiler/backends/oneflow/__init__.py
@@ -0,0 +1,3 @@
+from . import oneflow as _oneflow_backend
+from .deployable_module import OneflowDeployableModule
+from .env_var import OneflowCompileOptions
diff --git a/src/onediff/infer_compiler/utils/args_tree_util.py b/src/onediff/infer_compiler/backends/oneflow/args_tree_util.py
similarity index 98%
rename from src/onediff/infer_compiler/utils/args_tree_util.py
rename to src/onediff/infer_compiler/backends/oneflow/args_tree_util.py
index 598b95828..fb253e800 100644
--- a/src/onediff/infer_compiler/utils/args_tree_util.py
+++ b/src/onediff/infer_compiler/backends/oneflow/args_tree_util.py
@@ -1,7 +1,7 @@
 import torch
 import oneflow as flow
 from oneflow.framework.args_tree import ArgsTree
-from .log_utils import logger
+from onediff.utils import logger
 
 
 def input_output_processor(func):
diff --git a/src/onediff/infer_compiler/oneflow/deployable_module.py b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py
similarity index 72%
rename from src/onediff/infer_compiler/oneflow/deployable_module.py
rename to src/onediff/infer_compiler/backends/oneflow/deployable_module.py
index 71db38870..dd73dd111 100644
--- a/src/onediff/infer_compiler/oneflow/deployable_module.py
+++ b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py
@@ -1,18 +1,55 @@
 import types
 import torch
+from functools import wraps
+
 import oneflow as flow
 
-from ..core.deployable_module import DeployableModule
-from ..transform.manager import transform_mgr
-from ..utils.oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled
-from ..utils.args_tree_util import input_output_processor
-from ..utils.log_utils import logger
-from ..utils.param_utils import parse_device, check_device, generate_constant_folding_info
-from ..utils.graph_management_utils import graph_file_management
-from ..utils.online_quantization_utils import quantize_and_deploy_wrapper
-from ..utils.options import OneflowCompileOptions
+from onediff.utils import logger
+
+from ..deployable_module import DeployableModule
+
+from .transform.manager import transform_mgr
+from .transform.builtin_transform import torch2oflow
+
+from .dual_module import DualModule, get_mixed_dual_module
+from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled
+from .args_tree_util import input_output_processor
+from .param_utils import parse_device, check_device, generate_constant_folding_info
+from .graph_management_utils import graph_file_management
+from .online_quantization_utils import quantize_and_deploy_wrapper
+from .env_var import OneflowCompileOptions
+
+
+@torch2oflow.register
+def _(mod: DualModule, verbose=False):
+    return torch2oflow(mod._torch_module, verbose)
+
+
+def handle_deployable_exception(func):
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        if transform_mgr.debug_mode:
+            return func(self, *args, **kwargs)
+        else:
+            try:
+                return func(self, *args, **kwargs)
+            except Exception as e:
+                logger.error(f"Exception in {func.__name__}: {e=}")
+                logger.warning("Recompile oneflow module ...")
+                del self._deployable_module_model.oneflow_module
+                self._deployable_module_dpl_graph = None
+                return func(self, *args, **kwargs)
+
+    return wrapper
+
+
+def get_oneflow_graph(model, size=9, dynamic_graph=True):
+    from .graph import OneflowGraph
 
-from .utils import handle_deployable_exception, get_mixed_dual_module, get_oneflow_graph
+    g = OneflowGraph(model)
+    g._dynamic_input_graph_cache.set_cache_size(size)
+    g._dynamic_input_graph_cache.enable_shared(dynamic_graph)
+    return g
 
 
 class OneflowDeployableModule(DeployableModule):
@@ -199,3 +236,29 @@ def apply_online_quant(self, quant_config):
             >>> model.apply_online_quant(quant_config)
         """
         self._deployable_module_quant_config = quant_config
+
+
+def get_mixed_deployable_module(module_cls):
+    class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls):
+        def __init__(self, torch_module, oneflow_module, dynamic=True, options=None):
+            OneflowDeployableModule.__init__(
+                self, torch_module, oneflow_module, dynamic, options
+            )
+            self._is_raw_deployable_module = False
+
+        @classmethod
+        def from_existing(cls, existing_module, dynamic=True, options=None):
+            torch_module = existing_module._deployable_module_model._torch_module
+            oneflow_module = existing_module._deployable_module_model._oneflow_module
+            instance = cls(torch_module, oneflow_module, dynamic, options)
+            instance._deployable_module_dpl_graph = None
+            if hasattr(existing_module, "_deployable_module_dpl_graph"):
+                instance._deployable_module_dpl_graph = (
+                    existing_module._deployable_module_dpl_graph
+                )
+            return instance
+
+        def _get_name(self):
+            return f"{self.__class__.__name__}(of {module_cls.__name__})"
+
+    return MixedOneflowDeployableModule
diff --git a/src/onediff/infer_compiler/oneflow/dual_module.py b/src/onediff/infer_compiler/backends/oneflow/dual_module.py
similarity index 85%
rename from src/onediff/infer_compiler/oneflow/dual_module.py
rename to src/onediff/infer_compiler/backends/oneflow/dual_module.py
index 11a59ca18..903d814c7 100644
--- a/src/onediff/infer_compiler/oneflow/dual_module.py
+++ b/src/onediff/infer_compiler/backends/oneflow/dual_module.py
@@ -7,9 +7,9 @@
 import oneflow as flow
 from oneflow.utils.tensor import to_torch
 
-from ..transform.builtin_transform import torch2oflow
-from ..utils.oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled
-from ..utils.log_utils import logger
+from onediff.utils import logger
+from .transform.builtin_transform import torch2oflow
+from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled
 
 
 class DualModule(torch.nn.Module):
@@ -29,6 +29,7 @@ def oneflow_module(self):
         logger.debug(f"Convert {type(self._torch_module)} ...")
         self._oneflow_module = torch2oflow(self._torch_module)
         logger.debug(f"Convert {type(self._torch_module)} done!")
+
         return self._oneflow_module
 
     @oneflow_module.deleter
@@ -91,8 +92,6 @@ def __getattr__(self, name):
             return DualModuleList(torch_attr, oneflow_attr)
 
         elif isinstance(torch_attr, torch.nn.Module):
-            from .utils import get_mixed_dual_module
-
             return get_mixed_dual_module(torch_attr.__class__)(torch_attr, oneflow_attr)
         else:
             return oneflow_attr if oneflow_exec_mode_enabled() else torch_attr
@@ -101,6 +100,13 @@ def __setattr__(self, name: str, value: Any) -> None:
         if name in ["_torch_module", "_oneflow_module"]:
             super().__setattr__(name, value)
         else:  # TODO: aviod memory up when set attr
+            _torch_module: torch.nn.Module = self._torch_module
+            if (
+                hasattr(_torch_module, "_disable_param_update")
+                and _torch_module._disable_param_update
+            ):
+                return
+
             if self._oneflow_module is not None:
                 v = torch2oflow(value)
                 if isinstance(v, flow.Tensor):
@@ -108,7 +114,7 @@ def __setattr__(self, name: str, value: Any) -> None:
                     obj.copy_(v)
                 else:
                     setattr(self._oneflow_module, name, v)
-            setattr(self._torch_module, name, value)
+            setattr(_torch_module, name, value)
 
     def extra_repr(self) -> str:
         return self._torch_module.extra_repr()
@@ -120,7 +126,6 @@ def __init__(self, torch_modules, oneflow_modules):
         assert len(torch_modules) == len(oneflow_modules)
         self._torch_modules = torch_modules
         self._oneflow_modules = oneflow_modules
-        from .utils import get_mixed_dual_module
 
         dual_modules = []
         for torch_module, oneflow_module in zip(
@@ -152,3 +157,19 @@ def __setattr__(self, key, value):
             value = torch2oflow(value)
             setattr(self._oneflow_modules, key, value)
         return object.__setattr__(self, key, value)
+
+
+def get_mixed_dual_module(module_cls):
+    if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__:
+        return module_cls
+
+    class MixedDualModule(DualModule, module_cls):
+        def __init__(self, torch_module, oneflow_module):
+            while isinstance(torch_module, DualModule):
+                torch_module = torch_module._torch_module
+            DualModule.__init__(self, torch_module, oneflow_module)
+
+        def _get_name(self) -> str:
+            return f"{self.__class__.__name__}(of {module_cls.__name__})"
+
+    return MixedDualModule
diff --git a/src/onediff/infer_compiler/utils/env_var.py b/src/onediff/infer_compiler/backends/oneflow/env_var.py
similarity index 76%
rename from src/onediff/infer_compiler/utils/env_var.py
rename to src/onediff/infer_compiler/backends/oneflow/env_var.py
index ce58d8f93..68cdef570 100644
--- a/src/onediff/infer_compiler/utils/env_var.py
+++ b/src/onediff/infer_compiler/backends/oneflow/env_var.py
@@ -1,35 +1,50 @@
 import dataclasses
 import os
+import torch
 from typing import Optional
 
-
-def parse_boolean_from_env(env_var, default_value=None):
-    env_var = os.getenv(env_var)
-    if env_var is None:
-        return default_value
-    env_var = env_var.lower()
-    return env_var in ("1", "true", "yes", "on", "y")
-
-
-def set_boolean_env_var(env_var: str, val: Optional[bool]):
-    if val is None:
-        os.environ.pop(env_var, None)
-    else:
-        os.environ[env_var] = "1" if val else "0"
-
-
-def parse_integer_from_env(env_var, default_value=None):
-    env_var = os.getenv(env_var)
-    if env_var is None:
-        return default_value
-    return int(env_var)
-
-
-def set_integer_env_var(env_var: str, val: Optional[int]):
-    if val is None:
-        os.environ.pop(env_var, None)
-    else:
-        os.environ[env_var] = str(int(val))
+from onediff.utils import set_boolean_env_var, set_integer_env_var
+
+
+@dataclasses.dataclass
+class OneflowCompileOptions:
+    dynamic: bool = True
+    use_graph: bool = True
+    debug_level: int = -1
+    max_cached_graph_size: int = 9
+    graph_file: str = None
+    graph_file_device: torch.device = None
+
+    # Optimization related environment variables
+    run_graph_by_vm: bool = None
+    graph_delay_variable_op_execution: bool = None
+
+    conv_allow_half_precision_accumulation: bool = None
+    matmul_allow_half_precision_accumulation: bool = None
+    attention_allow_half_precision_accumulation: bool = None
+    attention_allow_half_precision_score_accumulation_max_m: int = None
+    attention_allow_quantization: bool = None
+
+    mlir_cse: bool = None
+    mlir_enable_inference_optimization: bool = None
+    mlir_enable_round_trip: bool = None
+    mlir_fuse_forward_ops: bool = None
+    mlir_fuse_ops_with_backward_impl: bool = None
+    mlir_group_matmul: bool = None
+    mlir_prefer_nhwc: bool = None
+    mlir_fuse_kernel_launch: bool = None
+
+    kernel_enable_cuda_graph: bool = None
+    kernel_enable_fused_conv_bias: bool = None
+    kernel_enable_fused_linear: bool = None
+    kernel_conv_cutlass_impl_enable_tuning_warmup: bool = None
+    kernel_enable_conv2d_tuning_warmup: bool = None
+    kernel_gemm_cutlass_impl_enable_tuning_warmup: bool = None
+    kernel_conv_enable_cutlass_impl: bool = None
+    kernel_gemm_enable_cutlass_impl: bool = None
+    kernel_glu_enable_dual_gemm_impl: bool = None
+    kernel_glu_enable_y_gemm_impl: bool = None
+    kernel_glu_quant_enable_dual_gemm_impl: bool = None
 
 
 def _set_env_vars(field2env_var, options):
@@ -117,17 +132,3 @@ def set_oneflow_default_env_vars():
     # TODO: enable this will cause the failure of multi resolution warmup
     # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1")
     # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1")
-
-
-def set_nexfort_env_vars(options):
-    field2env_var = {}
-    _set_env_vars(field2env_var, options)
-
-
-def set_nexfort_default_env_vars():
-    pass
-
-
-def set_default_env_vars():
-    set_oneflow_default_env_vars()
-    set_nexfort_default_env_vars()
diff --git a/src/onediff/infer_compiler/oneflow/graph.py b/src/onediff/infer_compiler/backends/oneflow/graph.py
similarity index 92%
rename from src/onediff/infer_compiler/oneflow/graph.py
rename to src/onediff/infer_compiler/backends/oneflow/graph.py
index 34aef1663..301270832 100644
--- a/src/onediff/infer_compiler/oneflow/graph.py
+++ b/src/onediff/infer_compiler/backends/oneflow/graph.py
@@ -1,9 +1,9 @@
 import oneflow as flow
 
-from ..transform.manager import transform_mgr
-from ..transform.builtin_transform import reverse_proxy_class
-from ..utils.log_utils import logger
-from ..utils.cost_util import cost_cnt
+from onediff.utils import logger
+from .transform.manager import transform_mgr
+from .transform.builtin_transform import reverse_proxy_class
+from .utils.cost_util import cost_cnt
 
 
 class OneflowGraph(flow.nn.Graph):
diff --git a/src/onediff/infer_compiler/utils/graph_management_utils.py b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py
similarity index 92%
rename from src/onediff/infer_compiler/utils/graph_management_utils.py
rename to src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py
index f60afbc7e..534fe69c4 100644
--- a/src/onediff/infer_compiler/utils/graph_management_utils.py
+++ b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py
@@ -7,11 +7,11 @@
 from pathlib import Path
 from functools import wraps
 from oneflow.framework.args_tree import ArgsTree
-from ..transform.builtin_transform import torch2oflow
-from ..transform.manager import transform_mgr
-from .log_utils import logger
-from .cost_util import cost_time
-from .options import OneflowCompileOptions
+from .transform.builtin_transform import torch2oflow
+from .transform.manager import transform_mgr
+from .utils.cost_util import cost_time
+from .env_var import OneflowCompileOptions
+from onediff.utils import logger
 
 
 def calculate_model_hash(model):
@@ -57,9 +57,12 @@ def wrapper(self, *args, **kwargs):
             # Avoid graph file conflicts
             if importlib.util.find_spec("register_comfy"):
                 from register_comfy import CrossAttntionStateDictPatch as state_patch
+
                 attn2_patch_sum = state_patch.attn2_patch_sum(input_kwargs=kwargs)
                 if attn2_patch_sum > 0:
-                    graph_file = graph_file.replace(".graph", f"_attn2_{attn2_patch_sum}.graph")
+                    graph_file = graph_file.replace(
+                        ".graph", f"_attn2_{attn2_patch_sum}.graph"
+                    )
 
         def process_state_dict_before_saving(state_dict: Dict):
             nonlocal self, args, kwargs, graph_file
@@ -98,7 +101,7 @@ def handle_graph_saving():
                 parent_dir = os.path.dirname(graph_file)
                 if parent_dir != "":
                     os.makedirs(parent_dir, exist_ok=True)
-                
+
                 # Avoid graph file conflicts
                 if os.path.exists(graph_file):
                     raise FileExistsError(f"File {graph_file} exists!")
diff --git a/src/onediff/infer_compiler/import_tools/__init__.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/__init__.py
similarity index 100%
rename from src/onediff/infer_compiler/import_tools/__init__.py
rename to src/onediff/infer_compiler/backends/oneflow/import_tools/__init__.py
diff --git a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py
similarity index 98%
rename from src/onediff/infer_compiler/import_tools/dyn_mock_mod.py
rename to src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py
index 4cb5fa6fc..8ac3ae0c9 100644
--- a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py
+++ b/src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py
@@ -10,8 +10,8 @@
 from oneflow.mock_torch import enable
 from oneflow.mock_torch.mock_importer import _importer
 from .import_module_utils import import_module_from_path
-from ..utils.log_utils import logger
-from ..utils.patch_for_compiler import *
+from onediff.utils import logger
+from .patch_for_compiler import *
 
 __all__ = ["DynamicMockModule"]
 
diff --git a/src/onediff/infer_compiler/import_tools/format_utils.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/format_utils.py
similarity index 100%
rename from src/onediff/infer_compiler/import_tools/format_utils.py
rename to src/onediff/infer_compiler/backends/oneflow/import_tools/format_utils.py
diff --git a/src/onediff/infer_compiler/import_tools/import_module_utils.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/import_module_utils.py
similarity index 100%
rename from src/onediff/infer_compiler/import_tools/import_module_utils.py
rename to src/onediff/infer_compiler/backends/oneflow/import_tools/import_module_utils.py
diff --git a/src/onediff/infer_compiler/import_tools/importer.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py
similarity index 99%
rename from src/onediff/infer_compiler/import_tools/importer.py
rename to src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py
index 0ac9ac4ba..854a7577b 100644
--- a/src/onediff/infer_compiler/import_tools/importer.py
+++ b/src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py
@@ -9,7 +9,7 @@
 from importlib.metadata import requires
 from .format_utils import MockEntityNameFormatter
 from .dyn_mock_mod import DynamicMockModule
-from ..utils.log_utils import logger
+from onediff.utils import logger
 
 __all__ = ["LazyMocker", "is_need_mock"]
 
diff --git a/src/onediff/infer_compiler/utils/patch_for_compiler.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py
similarity index 97%
rename from src/onediff/infer_compiler/utils/patch_for_compiler.py
rename to src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py
index 501411d2c..8e7f7e40b 100644
--- a/src/onediff/infer_compiler/utils/patch_for_compiler.py
+++ b/src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py
@@ -109,13 +109,17 @@ def scaled_dot_product_attention(
 
 from oneflow import Tensor
 
+
 def oneflow_rfloordiv():
-    original_rfloordiv = Tensor.__rfloordiv__ 
+    original_rfloordiv = Tensor.__rfloordiv__
+
     def rfloordiv(self, other):
         if isinstance(other, int):
             other = flow.tensor(other)
-            
+
         return original_rfloordiv(self, other)
+
     return rfloordiv
 
-Tensor.__rfloordiv__ = oneflow_rfloordiv()
\ No newline at end of file
+
+Tensor.__rfloordiv__ = oneflow_rfloordiv()
diff --git a/src/onediff/infer_compiler/backends/oneflow.py b/src/onediff/infer_compiler/backends/oneflow/oneflow.py
similarity index 79%
rename from src/onediff/infer_compiler/backends/oneflow.py
rename to src/onediff/infer_compiler/backends/oneflow/oneflow.py
index 71b010950..c534a3c08 100644
--- a/src/onediff/infer_compiler/backends/oneflow.py
+++ b/src/onediff/infer_compiler/backends/oneflow/oneflow.py
@@ -1,5 +1,6 @@
 import torch
-from .registry import register_backend
+
+from ..registry import register_backend
 
 
 @register_backend("oneflow")
@@ -8,7 +9,7 @@ def compile(torch_module: torch.nn.Module, *, options=None):
     Transform a torch nn.Module to oneflow.nn.Module, then optimize it with oneflow.nn.Graph.
     Args:
        model (torch.nn.Module): Module to optimize
-       options (CompileOptions): Compilation options to pass to the compiler:
+       options (OneflowCompileOptions): Compilation options to pass to the compiler:
         - 'dynamic': When this is True, we will generate one graph and reuse it to avoid recompilations when
                      input shape change.  This may not always work as some operations/optimizations break the contition of
                      reusing. When this is False, we will generate a graph for each new input shape, and will always specialize.
@@ -19,31 +20,33 @@ def compile(torch_module: torch.nn.Module, *, options=None):
         - 'graph_file' (None) generates a compilation cache file. If the file exists, loading occurs; if not, the compilation result is saved after the first run.
         - 'graph_file_device' (None) sets the device for the graph file, default None.  If set, the compilation result will be converted to the specified device.
     """
-    from ..oneflow.deployable_module import OneflowDeployableModule
-    from ..oneflow.utils import get_mixed_deployable_module
-    from ..transform.custom_transform import set_default_registry
-    from ..utils import CompileOptions, set_oneflow_env_vars
-    from ..utils.param_utils import (
+    from .deployable_module import OneflowDeployableModule, get_mixed_deployable_module
+    from .env_var import (
+        set_oneflow_default_env_vars,
+        set_oneflow_env_vars,
+        OneflowCompileOptions,
+    )
+    from .param_utils import (
         state_update_hook,
         init_state_update_attr,
         forward_pre_check_and_update_state_hook,
         forward_generate_constant_folding_info_hook,
     )
+    from .transform.custom_transform import set_default_registry
 
+    set_oneflow_default_env_vars()
     set_default_registry()
 
-    options = options if options is not None else CompileOptions()
-    set_oneflow_env_vars(options.oneflow)
+    options = options if options is not None else OneflowCompileOptions()
+    set_oneflow_env_vars(options)
 
     def wrap_module(module):
         if isinstance(module, OneflowDeployableModule):
             assert not module._is_raw_deployable_module
-            return module.__class__.from_existing(
-                module, options.dynamic, options.oneflow
-            )
+            return module.__class__.from_existing(module, options.dynamic, options)
         else:
             return get_mixed_deployable_module(module.__class__)(
-                module, None, options.dynamic, options.oneflow
+                module, None, options.dynamic, options
             )
 
     model = wrap_module(torch_module)
diff --git a/src/onediff/infer_compiler/utils/oneflow_exec_mode.py b/src/onediff/infer_compiler/backends/oneflow/oneflow_exec_mode.py
similarity index 100%
rename from src/onediff/infer_compiler/utils/oneflow_exec_mode.py
rename to src/onediff/infer_compiler/backends/oneflow/oneflow_exec_mode.py
diff --git a/src/onediff/infer_compiler/utils/online_quantization_utils.py b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py
similarity index 70%
rename from src/onediff/infer_compiler/utils/online_quantization_utils.py
rename to src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py
index a8fe99fd0..1a537dfc9 100644
--- a/src/onediff/infer_compiler/utils/online_quantization_utils.py
+++ b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py
@@ -1,11 +1,15 @@
 def patch_input_adapter(in_args, in_kwargs):
     return in_args, in_kwargs
 
+
 def online_quantize_model(
-    model, input_args, input_kwargs,
-    seed=1, inplace=True,
+    model,
+    input_args,
+    input_kwargs,
+    seed=1,
+    inplace=True,
     module_selector=lambda x: x,
-    quant_config = None,
+    quant_config=None,
     calibration_info=None,
 ):
     """Optimize the quantization pipeline.
@@ -19,19 +23,23 @@ def online_quantize_model(
         OnlineQuantModule,
         create_quantization_calculator,
     )
+
     if getattr(quant_config, "quantization_calculator", None):
         calculator = quant_config.quantization_calculator
     else:
         calculator = create_quantization_calculator(
-            model, quant_config, module_selector, seed,
+            model,
+            quant_config,
+            module_selector,
+            seed,
             calibration_info=calibration_info,
         )
     module = OnlineQuantModule(calculator, False, inplace=inplace)
-    in_args , in_kwargs = patch_input_adapter(input_args, input_kwargs)
-    quantized_model,  info = module.quantize_with_calibration(
-        *in_args, **in_kwargs
-    )
+    in_args, in_kwargs = patch_input_adapter(input_args, input_kwargs)
+    quantized_model, info = module.quantize_with_calibration(*in_args, **in_kwargs)
     status = module.collect_quantization_status(model, info)
+    for _, layer in quantized_model.named_modules():
+        layer._disable_param_update = True
 
     return quantized_model, status
 
@@ -42,14 +50,15 @@ def wrapper(self: "DeployableModule", *args, **kwargs):
         quant_config = self._deployable_module_quant_config
         if quant_config:
             torch_model, _ = online_quantize_model(
-                torch_model, args, kwargs,
+                torch_model,
+                args,
+                kwargs,
                 module_selector=lambda x: x,
                 quant_config=quant_config,
                 inplace=True,
             )
-            self._deployable_module_quant_config = None 
+            self._deployable_module_quant_config = None
         output = func(self, *args, **kwargs)
         return output
-    return wrapper
 
-       
\ No newline at end of file
+    return wrapper
diff --git a/src/onediff/infer_compiler/utils/param_utils.py b/src/onediff/infer_compiler/backends/oneflow/param_utils.py
similarity index 97%
rename from src/onediff/infer_compiler/utils/param_utils.py
rename to src/onediff/infer_compiler/backends/oneflow/param_utils.py
index cbe71d003..c5f53440f 100644
--- a/src/onediff/infer_compiler/utils/param_utils.py
+++ b/src/onediff/infer_compiler/backends/oneflow/param_utils.py
@@ -3,7 +3,7 @@
 import oneflow as flow
 from typing import List, Dict, Any, Union
 
-from .log_utils import logger
+from onediff.utils import logger
 
 
 def parse_device(args: List[Any], kwargs: Dict[str, Any]):
@@ -80,8 +80,8 @@ def set_constant_folded_conv_attr(
 def generate_constant_folding_info(
     deployable_module, torch_module: torch.nn.Module = None
 ) -> Dict[str, flow.Tensor]:
-    removeprefix = lambda ss, prefix: ss[len(prefix):] if ss.startswith(prefix) else ss
-    
+    removeprefix = lambda ss, prefix: ss[len(prefix) :] if ss.startswith(prefix) else ss
+
     # convert str like 'variable_transpose_model.input_blocks.10.0.in_layers.2.weight_239'
     # to 'input_blocks.10.0.in_layers.2.weight'
     def convert_var_name(s: str, prefix="variable_transpose_"):
@@ -186,8 +186,9 @@ def forward_pre_check_and_update_state_hook(module, args):
     update_graph_with_constant_folding_info(module, constant_folding_info)
     setattr(module._torch_module, STATE_UPDATED_ATTR, False)
 
+
 def removesuffix(s: str, suffix: str) -> str:
     if s.endswith(suffix):
-        return s[:len(s) - len(suffix)]
+        return s[: len(s) - len(suffix)]
     else:
         return s
diff --git a/src/onediff/infer_compiler/transform/__init__.py b/src/onediff/infer_compiler/backends/oneflow/transform/__init__.py
similarity index 100%
rename from src/onediff/infer_compiler/transform/__init__.py
rename to src/onediff/infer_compiler/backends/oneflow/transform/__init__.py
diff --git a/src/onediff/infer_compiler/transform/builtin_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py
similarity index 98%
rename from src/onediff/infer_compiler/transform/builtin_transform.py
rename to src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py
index 9c466a455..83a2b9dd6 100644
--- a/src/onediff/infer_compiler/transform/builtin_transform.py
+++ b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py
@@ -12,11 +12,12 @@
 import oneflow as flow
 
 from .manager import transform_mgr
-from ..utils.log_utils import logger
-from ..utils.patch_for_diffusers import diffusers_checker
+from onediff.utils import logger
+from .patch_for_diffusers import diffusers_checker
 from ..import_tools.importer import is_need_mock
 
 from .patch_for_comfy import PatchForComfy
+
 __all__ = [
     "proxy_class",
     "ProxySubmodule",
@@ -26,6 +27,7 @@
     "default_converter",
 ]
 
+
 def singledispatch_proxy(func):
     dispatcher = singledispatch(func)
     _warning_set = set()
@@ -57,10 +59,12 @@ def wrapper(first_param, *args, **kwargs):
 def proxy_class(cls: type):
     try:
         out = transform_mgr.transform_cls(cls)
-        return out 
+        return out
     except Exception as e:
         # If an exception occurs during transformation, print traceback for debugging
-        raise RuntimeError(f"An exception occurred during class transformation:\n{traceback.format_exc()}\nException: {e}")
+        raise RuntimeError(
+            f"An exception occurred during class transformation:\n{traceback.format_exc()}\nException: {e}"
+        )
 
 
 def reverse_proxy_class(cls: type):
@@ -447,7 +451,7 @@ def _(mod: types.BuiltinFunctionType, verbose=False):
         if mod_name is not None:
             m = importlib.import_module(mod_name)
             return getattr(m, mod.__name__)
-    
+
     return default_converter(mod, verbose)
 
 
diff --git a/src/onediff/infer_compiler/transform/custom_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py
similarity index 96%
rename from src/onediff/infer_compiler/transform/custom_transform.py
rename to src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py
index 0d0e71f59..feab6000f 100644
--- a/src/onediff/infer_compiler/transform/custom_transform.py
+++ b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py
@@ -6,7 +6,7 @@
 from ..import_tools import import_module_from_path
 from .manager import transform_mgr
 from .builtin_transform import torch2oflow
-from ..utils.log_utils import logger
+from onediff.utils import logger
 
 __all__ = ["register"]
 
@@ -49,7 +49,7 @@ def import_module_safely(module_path, module_name):
             logger.warning(f"Failed to import {module_name} from {module_path}. {e=}")
 
     # compiler_registry_path
-    registry_path = Path(__file__).parents[3] / "infer_compiler_registry"
+    registry_path = Path(__file__).parents[5] / "infer_compiler_registry"
 
     if importlib.util.find_spec("diffusers") is not None:
         import_module_safely(registry_path / "register_diffusers", "register_diffusers")
diff --git a/src/onediff/infer_compiler/transform/manager.py b/src/onediff/infer_compiler/backends/oneflow/transform/manager.py
similarity index 99%
rename from src/onediff/infer_compiler/transform/manager.py
rename to src/onediff/infer_compiler/backends/oneflow/transform/manager.py
index df8c65fa4..376b1e881 100644
--- a/src/onediff/infer_compiler/transform/manager.py
+++ b/src/onediff/infer_compiler/backends/oneflow/transform/manager.py
@@ -5,7 +5,7 @@
 import logging
 from typing import Dict, List, Union
 from pathlib import Path
-from ..utils.log_utils import logger
+from onediff.utils import logger
 from ..import_tools.importer import LazyMocker
 
 __all__ = ["transform_mgr"]
@@ -117,6 +117,7 @@ def transform_package(self, package_name):
 
 if importlib.util.find_spec("pydantic") is not None:
     import pydantic
+
     if pydantic.VERSION < "2.5.2":
         logger.warning(
             f"Pydantic version {pydantic.VERSION} is too low, please upgrade to 2.5.2 or higher."
@@ -126,5 +127,3 @@ def transform_package(self, package_name):
         MockEnableDisableMixin.hazard_list.append(
             "huggingface_hub.inference._text_generation"
         )
-
-
diff --git a/src/onediff/infer_compiler/transform/patch_for_comfy.py b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_comfy.py
similarity index 100%
rename from src/onediff/infer_compiler/transform/patch_for_comfy.py
rename to src/onediff/infer_compiler/backends/oneflow/transform/patch_for_comfy.py
diff --git a/src/onediff/infer_compiler/utils/patch_for_diffusers.py b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py
similarity index 95%
rename from src/onediff/infer_compiler/utils/patch_for_diffusers.py
rename to src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py
index 1de90c151..e5cb43cbf 100644
--- a/src/onediff/infer_compiler/utils/patch_for_diffusers.py
+++ b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py
@@ -1,6 +1,6 @@
 # TODO: remove this file to diffusers/src/infer_compiler_registry/register_diffusers
 from abc import ABC, abstractmethod
-from .log_utils import logger
+from onediff.utils import logger
 
 try:
     import diffusers
diff --git a/src/onediff/infer_compiler/nexfort/__init__.py b/src/onediff/infer_compiler/backends/oneflow/utils/__init__.py
similarity index 100%
rename from src/onediff/infer_compiler/nexfort/__init__.py
rename to src/onediff/infer_compiler/backends/oneflow/utils/__init__.py
diff --git a/src/onediff/infer_compiler/utils/cost_util.py b/src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py
similarity index 99%
rename from src/onediff/infer_compiler/utils/cost_util.py
rename to src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py
index 59a12a36a..4cb1575f5 100644
--- a/src/onediff/infer_compiler/utils/cost_util.py
+++ b/src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py
@@ -2,7 +2,7 @@
 import oneflow as flow
 import time
 import inspect
-from .log_utils import logger
+from onediff.utils import logger
 
 __all__ = ["cost_cnt", "cost_time"]
 
diff --git a/src/onediff/infer_compiler/utils/version_util.py b/src/onediff/infer_compiler/backends/oneflow/utils/version_util.py
similarity index 96%
rename from src/onediff/infer_compiler/utils/version_util.py
rename to src/onediff/infer_compiler/backends/oneflow/utils/version_util.py
index 58dc6ab08..5e0d22a8e 100644
--- a/src/onediff/infer_compiler/utils/version_util.py
+++ b/src/onediff/infer_compiler/backends/oneflow/utils/version_util.py
@@ -1,5 +1,5 @@
 from importlib_metadata import version
-from .log_utils import logger
+from onediff.utils import logger
 
 
 def get_support_message():
diff --git a/src/onediff/infer_compiler/backends/registry.py b/src/onediff/infer_compiler/backends/registry.py
index 46c1234cd..bbf0e24bf 100644
--- a/src/onediff/infer_compiler/backends/registry.py
+++ b/src/onediff/infer_compiler/backends/registry.py
@@ -28,23 +28,15 @@ def lookup_backend(compiler_fn):
     """Expand backend strings to functions"""
     if isinstance(compiler_fn, str):
         if compiler_fn not in _BACKENDS:
-            _lazy_import()
+            _lazy_import(compiler_fn)
         if compiler_fn not in _BACKENDS:
             raise RuntimeError(f"invalid backend {compiler_fn}")
         compiler_fn = _BACKENDS[compiler_fn]
     return compiler_fn
 
 
-@functools.lru_cache(None)
-def _lazy_import():
+def _lazy_import(backend_name):
     from .. import backends
 
-    def import_submodule(mod: types.ModuleType):
-        """
-        Ensure all the files in a given submodule are imported
-        """
-        for filename in sorted(os.listdir(os.path.dirname(cast(str, mod.__file__)))):
-            if filename.endswith(".py") and filename[0] != "_":
-                importlib.import_module(f"{mod.__name__}.{filename[:-3]}")
-
-    import_submodule(backends)
+    backend_path = f"{backends.__name__}.{backend_name}"
+    importlib.import_module(backend_path)
diff --git a/src/onediff/infer_compiler/core/__init__.py b/src/onediff/infer_compiler/core/__init__.py
deleted file mode 100644
index 2c2324087..000000000
--- a/src/onediff/infer_compiler/core/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .deployable_module import DeployableModule
-from .with_onediff_compile import compile, oneflow_compile
diff --git a/src/onediff/infer_compiler/oneflow/__init__.py b/src/onediff/infer_compiler/oneflow/__init__.py
deleted file mode 100644
index 6066ae13e..000000000
--- a/src/onediff/infer_compiler/oneflow/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .deployable_module import OneflowDeployableModule
diff --git a/src/onediff/infer_compiler/oneflow/config.py b/src/onediff/infer_compiler/oneflow/config.py
deleted file mode 100644
index 0e1d2f543..000000000
--- a/src/onediff/infer_compiler/oneflow/config.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import os
-from typing import Optional
-import dataclasses
-from ..utils import (
-    parse_boolean_from_env,
-    set_boolean_env_var,
-    parse_integer_from_env,
-    set_integer_env_var,
-)
-
-
-def init_default_env():
-    # ONEFLOW_RUN_GRAPH_BY_VM must set here to enable nn.Graph init with vm run
-    os.environ.setdefault("ONEFLOW_RUN_GRAPH_BY_VM", "1")
-    os.environ.setdefault("ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", "1")
-
-    os.environ.setdefault("ONEFLOW_MLIR_CSE", "1")
-    os.environ.setdefault("ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", "1")
-    os.environ.setdefault("ONEFLOW_MLIR_ENABLE_ROUND_TRIP", "1")
-    os.environ.setdefault("ONEFLOW_MLIR_FUSE_FORWARD_OPS", "1")
-    os.environ.setdefault("ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", "1")
-    os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL", "1")
-    os.environ.setdefault("ONEFLOW_MLIR_PREFER_NHWC", "1")
-
-    os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", "1")
-    os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", "1")
-    os.environ.setdefault("ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1")
-    os.environ.setdefault("ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1")
-    os.environ.setdefault("ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", "1")
-    os.environ.setdefault("ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", "1")
-    os.environ.setdefault("ONEFLOW_CONVOLUTION_BIAS_ADD_ACT_FUSION", "1")
-    # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", "0")
-    # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", "0")
-    # os.environ.setdefault("ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", "0")
-
-    os.environ.setdefault("ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
-    os.environ.setdefault("ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
-    os.environ.setdefault("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "1")
-    # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
-    # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", "-1")
-    # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_QUANTIZATION", "1")
-
-    os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL_QUANT", "1")
-    os.environ.setdefault("ONEFLOW_CONV2D_KERNEL_ENABLE_TUNING_WARMUP", "1")
-    # TODO: enable this will cause the failure of multi resolution warmup
-    # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1")
-    # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1")
-
-
-@dataclasses.dataclass
-class OneFlowCompilerConfig:
-    run_graph_by_vm: Optional[bool] = None
-    graph_delay_variable_op_execution: Optional[bool] = None
-
-    mlir_cse: Optional[bool] = None
-    mlir_enable_inference_optimization: Optional[bool] = None
-    mlir_enable_round_trip: Optional[bool] = None
-    mlir_fuse_forward_ops: Optional[bool] = None
-    mlir_fuse_ops_with_backward_impl: Optional[bool] = None
-    mlir_group_matmul: Optional[bool] = None
-    mlir_prefer_nhwc: Optional[bool] = None
-    mlir_fuse_kernel_launch: Optional[bool] = None
-
-    kernel_enable_cuda_graph: Optional[bool] = None
-    kernel_enable_fused_conv_bias: Optional[bool] = None
-    kernel_enable_fused_linear: Optional[bool] = None
-    kernel_conv_cutlass_impl_enable_tuning_warmup: Optional[bool] = None
-    kernel_gemm_cutlass_impl_enable_tuning_warmup: Optional[bool] = None
-    kernel_conv_enable_cutlass_impl: Optional[bool] = None
-    kernel_gemm_enable_cutlass_impl: Optional[bool] = None
-    kernel_glu_enable_dual_gemm_impl: Optional[bool] = None
-    kernel_glu_enable_y_gemm_impl: Optional[bool] = None
-    kernel_glu_quant_enable_dual_gemm_impl: Optional[bool] = None
-
-    conv_allow_half_precision_accumulation: Optional[bool] = None
-    matmul_allow_half_precision_accumulation: Optional[bool] = None
-    linear_embedding_skip_init: Optional[bool] = None
-    attention_allow_half_precision_accumulation: Optional[bool] = None
-    attention_allow_half_precision_score_accumulation_max_m: Optional[int] = None
-    attention_allow_quantization: Optional[bool] = None
-    conv2d_kernel_enable_tuning_warmup: Optional[bool] = None
-
-    attr2env_var = {
-        "run_graph_by_vm": "ONEFLOW_RUN_GRAPH_BY_VM",
-        "graph_delay_variable_op_execution": "ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION",
-        "mlir_cse": "ONEFLOW_MLIR_CSE",
-        "mlir_enable_inference_optimization": "ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION",
-        "mlir_enable_round_trip": "ONEFLOW_MLIR_ENABLE_ROUND_TRIP",
-        "mlir_fuse_forward_ops": "ONEFLOW_MLIR_FUSE_FORWARD_OPS",
-        "mlir_fuse_ops_with_backward_impl": "ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL",
-        "mlir_group_matmul": "ONEFLOW_MLIR_GROUP_MATMUL",
-        "mlir_prefer_nhwc": "ONEFLOW_MLIR_PREFER_NHWC",
-        "mlir_fuse_kernel_launch": "ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH",
-        "kernel_enable_cuda_graph": "ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH",
-        "kernel_enable_fused_conv_bias": "ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS",
-        "kernel_enable_fused_linear": "ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR",
-        "kernel_conv_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP",
-        "kernel_gemm_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP",
-        "kernel_conv_enable_cutlass_impl": "ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL",
-        "kernel_gemm_enable_cutlass_impl": "ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL",
-        "kernel_glu_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL",
-        "kernel_glu_enable_y_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL",
-        "kernel_glu_quant_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL",
-        "conv_allow_half_precision_accumulation": "ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION",
-        "matmul_allow_half_precision_accumulation": "ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION",
-        "linear_embedding_skip_init": "ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT",
-        "attention_allow_half_precision_accumulation": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION",
-        "attention_allow_half_precision_score_accumulation_max_m": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M",
-        "conv2d_kernel_enable_tuning_warmup":'ONEFLOW_CONV2D_KERNEL_ENABLE_TUNING_WARMUP',
-    }
-
-    def __post_init__(self):
-        fields = dataclasses.fields(self)
-        fields = {field.name: field for field in fields}
-        for name in self.attr2env_var:
-            if fields[name].type in (bool, Optional[bool]):
-                super().__setattr__(
-                    name, parse_boolean_from_env(self.attr2env_var[name])
-                )
-            elif fields[name].type in (int, Optional[int]):
-                super().__setattr__(
-                    name, parse_integer_from_env(self.attr2env_var[name])
-                )
-            else:
-                raise ValueError(
-                    f"Unsupported type {dataclasses.fields(self)[name].type}"
-                )
-
-        super().__setattr__("_initialized", True)
-
-    def __setattr__(self, name, value):
-        super().__setattr__(name, value)
-        if getattr(self, "_initialized", False) and name in self.attr2env_var:
-            fields = dataclasses.fields(self)
-            fields = dataclasses.fields(self)
-            fields = {field.name: field for field in fields}
-            if fields[name].type in (bool, Optional[bool]):
-                set_boolean_env_var(self.attr2env_var[name], value)
-            elif fields[name].type in (int, Optional[int]):
-                set_integer_env_var(self.attr2env_var[name], value)
-            else:
-                raise ValueError(
-                    f"Unsupported type {dataclasses.fields(self)[name].type}"
-                )
-
-
-init_default_env()
-oneflow_compiler_config = OneFlowCompilerConfig()
diff --git a/src/onediff/infer_compiler/oneflow/utils.py b/src/onediff/infer_compiler/oneflow/utils.py
deleted file mode 100644
index 4a5e899aa..000000000
--- a/src/onediff/infer_compiler/oneflow/utils.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from functools import wraps
-
-from ..transform.builtin_transform import torch2oflow
-from ..transform.manager import transform_mgr
-from ..utils.log_utils import logger
-from .dual_module import DualModule
-
-
-@torch2oflow.register
-def _(mod: DualModule, verbose=False):
-    return torch2oflow(mod._torch_module, verbose)
-
-
-def handle_deployable_exception(func):
-    @wraps(func)
-    def wrapper(self, *args, **kwargs):
-        if transform_mgr.debug_mode:
-            return func(self, *args, **kwargs)
-        else:
-            try:
-                return func(self, *args, **kwargs)
-            except Exception as e:
-                logger.error(f"Exception in {func.__name__}: {e=}")
-                logger.warning("Recompile oneflow module ...")
-                del self._deployable_module_model.oneflow_module
-                self._deployable_module_dpl_graph = None
-                return func(self, *args, **kwargs)
-
-    return wrapper
-
-
-def get_mixed_dual_module(module_cls):
-    if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__:
-        return module_cls
-
-    class MixedDualModule(DualModule, module_cls):
-        def __init__(self, torch_module, oneflow_module):
-            while isinstance(torch_module, DualModule):
-                torch_module = torch_module._torch_module
-            DualModule.__init__(self, torch_module, oneflow_module)
-
-        def _get_name(self) -> str:
-            return f"{self.__class__.__name__}(of {module_cls.__name__})"
-
-    return MixedDualModule
-
-
-# Return a OneflowDeployableModule that using module_cls as it's parent class.
-def get_mixed_deployable_module(module_cls):
-    from .deployable_module import OneflowDeployableModule
-
-    class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls):
-        def __init__(self, torch_module, oneflow_module, dynamic=True, options=None):
-            OneflowDeployableModule.__init__(
-                self, torch_module, oneflow_module, dynamic, options
-            )
-            self._is_raw_deployable_module = False
-
-        @classmethod
-        def from_existing(cls, existing_module, dynamic=True, options=None):
-            torch_module = existing_module._deployable_module_model._torch_module
-            oneflow_module = existing_module._deployable_module_model._oneflow_module
-            instance = cls(torch_module, oneflow_module, dynamic, options)
-            instance._deployable_module_dpl_graph = None
-            if hasattr(existing_module, "_deployable_module_dpl_graph"):
-                instance._deployable_module_dpl_graph = (
-                    existing_module._deployable_module_dpl_graph
-                )
-            return instance
-
-        def _get_name(self):
-            return f"{self.__class__.__name__}(of {module_cls.__name__})"
-
-    return MixedOneflowDeployableModule
-
-
-def get_oneflow_graph(model, size=9, dynamic_graph=True):
-    from .graph import OneflowGraph
-
-    g = OneflowGraph(model)
-    g._dynamic_input_graph_cache.set_cache_size(size)
-    g._dynamic_input_graph_cache.enable_shared(dynamic_graph)
-    return g
diff --git a/src/onediff/infer_compiler/utils/__init__.py b/src/onediff/infer_compiler/utils/__init__.py
deleted file mode 100644
index 076b41bcd..000000000
--- a/src/onediff/infer_compiler/utils/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled
-from .env_var import (
-    parse_boolean_from_env,
-    set_boolean_env_var,
-    parse_integer_from_env,
-    set_integer_env_var,
-    set_oneflow_env_vars,
-    set_oneflow_default_env_vars,
-    set_nexfort_env_vars,
-    set_nexfort_default_env_vars,
-    set_default_env_vars,
-)
-from .model_inplace_assign import TensorInplaceAssign
-from .version_util import (
-    get_support_message,
-    is_quantization_enabled,
-    is_community_version,
-)
-from .options import *
diff --git a/src/onediff/infer_compiler/utils/options.py b/src/onediff/infer_compiler/utils/options.py
deleted file mode 100644
index f96e83e37..000000000
--- a/src/onediff/infer_compiler/utils/options.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import dataclasses
-from typing import Dict
-import torch
-
-
-@dataclasses.dataclass
-class OneflowCompileOptions:
-    use_graph: bool = True
-    debug_level: int = -1
-    max_cached_graph_size: int = 9
-    graph_file: str = None
-    graph_file_device: torch.device = None
-
-    # Optimization related environment variables
-    run_graph_by_vm: bool = None
-    graph_delay_variable_op_execution: bool = None
-
-    conv_allow_half_precision_accumulation: bool = None
-    matmul_allow_half_precision_accumulation: bool = None
-    attention_allow_half_precision_accumulation: bool = None
-    attention_allow_half_precision_score_accumulation_max_m: int = None
-    attention_allow_quantization: bool = None
-
-    mlir_cse: bool = None
-    mlir_enable_inference_optimization: bool = None
-    mlir_enable_round_trip: bool = None
-    mlir_fuse_forward_ops: bool = None
-    mlir_fuse_ops_with_backward_impl: bool = None
-    mlir_group_matmul: bool = None
-    mlir_prefer_nhwc: bool = None
-    mlir_fuse_kernel_launch: bool = None
-
-    kernel_enable_cuda_graph: bool = None
-    kernel_enable_fused_conv_bias: bool = None
-    kernel_enable_fused_linear: bool = None
-    kernel_conv_cutlass_impl_enable_tuning_warmup: bool = None
-    kernel_enable_conv2d_tuning_warmup: bool = None
-    kernel_gemm_cutlass_impl_enable_tuning_warmup: bool = None
-    kernel_conv_enable_cutlass_impl: bool = None
-    kernel_gemm_enable_cutlass_impl: bool = None
-    kernel_glu_enable_dual_gemm_impl: bool = None
-    kernel_glu_enable_y_gemm_impl: bool = None
-    kernel_glu_quant_enable_dual_gemm_impl: bool = None
-
-
-@dataclasses.dataclass
-class NexfortInductorCompileOptions:
-    disable: bool = False
-    mode: str = None
-    options: Dict = dataclasses.field(default_factory=dict)
-
-
-@dataclasses.dataclass
-class NexfortCompileOptions:
-    memory_format: torch.memory_format
-    fuse_qkv_projections: bool
-    inductor: NexfortInductorCompileOptions
-
-    def __init__(
-        self,
-        memory_format=torch.channels_last,
-        fuse_qkv_projections=True,
-        inductor=None,
-    ):
-        if isinstance(memory_format, str):
-            memory_format = getattr(torch, memory_format)
-        self.memory_format = memory_format
-        self.fuse_qkv_projections = fuse_qkv_projections
-        self.inductor = (
-            inductor if inductor is not None else NexfortInductorCompileOptions()
-        )
-
-
-@dataclasses.dataclass
-class CompileOptions:
-    # common options
-    dynamic: bool
-
-    # oneflow specific options
-    oneflow: OneflowCompileOptions
-
-    # nexfort specific options
-    nexfort: NexfortCompileOptions
-
-    def __init__(self, dynamic=True, oneflow=None, nexfort=None):
-        self.dynamic = dynamic
-        self.oneflow = oneflow if oneflow is not None else OneflowCompileOptions()
-        self.nexfort = nexfort if nexfort is not None else NexfortCompileOptions()
-
-
-# a global default compile options
-_GLOBAL_compile_options = CompileOptions()
diff --git a/src/onediff/optimization/attention_processor.py b/src/onediff/optimization/attention_processor.py
index 22650ab62..c57dcc602 100644
--- a/src/onediff/optimization/attention_processor.py
+++ b/src/onediff/optimization/attention_processor.py
@@ -84,7 +84,7 @@ def __call__(
             hidden_states = flow.bmm(attention_probs, value)
             hidden_states = attn.batch_to_head_dim(hidden_states)
         else:
-            from ..infer_compiler.utils import (
+            from onediff.utils import (
                 parse_boolean_from_env,
                 set_boolean_env_var,
             )
@@ -123,7 +123,7 @@ def __call__(
 
 
 try:
-    from onediff.infer_compiler.transform import register
+    from onediff.infer_compiler.backends.oneflow.transform import register
 
     def convert_fused_self_attn_processor(
         mod: FusedSelfAttnProcessor, verbose=True
@@ -132,4 +132,4 @@ def convert_fused_self_attn_processor(
 
     register(torch2oflow_funcs=convert_fused_self_attn_processor)
 except:
-    print("Skip onediff.infer_compiler.transform.register")
+    print("Skip onediff.infer_compiler.backends.oneflow.transform.register")
diff --git a/src/onediff/optimization/quant_optimizer.py b/src/onediff/optimization/quant_optimizer.py
index 24a104dfc..9a00b883b 100644
--- a/src/onediff/optimization/quant_optimizer.py
+++ b/src/onediff/optimization/quant_optimizer.py
@@ -2,11 +2,13 @@
 import torch
 import torch.nn as nn
 from copy import deepcopy
-from ..infer_compiler.utils.log_utils import logger
-from ..infer_compiler.utils.version_util import is_quantization_enabled
-from ..infer_compiler.utils.cost_util import cost_cnt
-from ..infer_compiler.utils.module_operations import modify_sub_module
-from ..infer_compiler.transform.manager import transform_mgr
+from onediff.utils import logger
+from onediff.infer_compiler.backends.oneflow.utils.version_util import (
+    is_quantization_enabled,
+)
+from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt
+from onediff.infer_compiler.backends.oneflow.transform.manager import transform_mgr
+from onediff.torch_utils.module_operations import modify_sub_module
 
 
 __all__ = ["quantize_model", "varify_can_use_quantization"]
@@ -107,4 +109,3 @@ def apply_quantization_to_modules(quantizable_modules):
     )
 
     return model
-
diff --git a/src/onediff/quantization/load_quantized_model.py b/src/onediff/quantization/load_quantized_model.py
index 9500aa314..913466137 100644
--- a/src/onediff/quantization/load_quantized_model.py
+++ b/src/onediff/quantization/load_quantized_model.py
@@ -1,22 +1,30 @@
 from diffusers import AutoPipelineForText2Image
 from onediff.quantization.quantize_pipeline import QuantPipeline
-import argparse 
+import argparse
 import torch
 from onediff.infer_compiler import oneflow_compile
 
+
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--prompt", default="a photo of an astronaut riding a horse on mars")
-    parser.add_argument("--height", type= int,default=1024)
-    parser.add_argument("--width", type= int, default=1024)
+    parser.add_argument(
+        "--prompt", default="a photo of an astronaut riding a horse on mars"
+    )
+    parser.add_argument("--height", type=int, default=1024)
+    parser.add_argument("--width", type=int, default=1024)
     parser.add_argument("--num_inference_steps", type=int, default=30)
     parser.add_argument("--quantized_model", type=str, required=True)
     return parser.parse_args()
 
+
 args = parse_args()
 
 pipe = QuantPipeline.from_quantized(
-    AutoPipelineForText2Image, args.quantized_model, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
+    AutoPipelineForText2Image,
+    args.quantized_model,
+    torch_dtype=torch.float16,
+    variant="fp16",
+    use_safetensors=True,
 )
 pipe = pipe.to("cuda")
 
diff --git a/src/onediff/quantization/quant_pipeline_test.py b/src/onediff/quantization/quant_pipeline_test.py
index c68589fbc..a23efd134 100644
--- a/src/onediff/quantization/quant_pipeline_test.py
+++ b/src/onediff/quantization/quant_pipeline_test.py
@@ -1,15 +1,19 @@
 from diffusers import AutoPipelineForText2Image
 from onediff.quantization.quantize_pipeline import QuantPipeline
 import torch
-import argparse 
+import argparse
 
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--floatting_model_path", default="runwayml/stable-diffusion-v1-5")
-    parser.add_argument("--prompt", default="a photo of an astronaut riding a horse on mars")
-    parser.add_argument("--height",type=int, default=1024)
-    parser.add_argument("--width", type=int,default=1024)
+    parser.add_argument(
+        "--floatting_model_path", default="runwayml/stable-diffusion-v1-5"
+    )
+    parser.add_argument(
+        "--prompt", default="a photo of an astronaut riding a horse on mars"
+    )
+    parser.add_argument("--height", type=int, default=1024)
+    parser.add_argument("--width", type=int, default=1024)
     parser.add_argument("--num_inference_steps", type=int, default=30)
     parser.add_argument("--conv_compute_density_threshold", type=int, default=900)
     parser.add_argument("--linear_compute_density_threshold", type=int, default=300)
@@ -20,10 +24,15 @@ def parse_args():
     parser.add_argument("--quantized_model", default="./quantized_model")
     return parser.parse_args()
 
+
 args = parse_args()
 
 pipe = QuantPipeline.from_pretrained(
-    AutoPipelineForText2Image, args.floatting_model_path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
+    AutoPipelineForText2Image,
+    args.floatting_model_path,
+    torch_dtype=torch.float16,
+    variant="fp16",
+    use_safetensors=True,
 )
 pipe.to("cuda")
 
@@ -34,13 +43,15 @@ def parse_args():
     num_inference_steps=args.num_inference_steps,
 )
 
-pipe.quantize(**pipe_kwargs,
+pipe.quantize(
+    **pipe_kwargs,
     conv_compute_density_threshold=args.conv_compute_density_threshold,
     linear_compute_density_threshold=args.linear_compute_density_threshold,
     conv_ssim_threshold=args.conv_ssim_threshold,
     linear_ssim_threshold=args.linear_ssim_threshold,
     save_as_float=args.save_as_float,
     plot_calibrate_info=False,
-    cache_dir=args.cache_dir)
+    cache_dir=args.cache_dir
+)
 
 pipe.save_quantized(args.quantized_model, safe_serialization=True)
diff --git a/src/onediff/quantization/quantize_utils.py b/src/onediff/quantization/quantize_utils.py
index 678787586..9b0b58022 100644
--- a/src/onediff/quantization/quantize_utils.py
+++ b/src/onediff/quantization/quantize_utils.py
@@ -17,7 +17,7 @@ def load_calibration_and_quantize_pipeline(calibration_path, pipe):
 
     store = CalibrationStorage()
     calibrate_info = store.load_from_file(file_path=calibration_path)
-    
+
     for sub_module_name, sub_calibrate_info in calibrate_info.items():
         replace_sub_module_with_quantizable_module(
             pipe.unet,
diff --git a/src/onediff/torch_utils/__init__.py b/src/onediff/torch_utils/__init__.py
new file mode 100644
index 000000000..5a82505fa
--- /dev/null
+++ b/src/onediff/torch_utils/__init__.py
@@ -0,0 +1 @@
+from .model_inplace_assign import TensorInplaceAssign
diff --git a/src/onediff/infer_compiler/utils/model_inplace_assign.py b/src/onediff/torch_utils/model_inplace_assign.py
similarity index 98%
rename from src/onediff/infer_compiler/utils/model_inplace_assign.py
rename to src/onediff/torch_utils/model_inplace_assign.py
index f61276f5b..c8edc6a6d 100644
--- a/src/onediff/infer_compiler/utils/model_inplace_assign.py
+++ b/src/onediff/torch_utils/model_inplace_assign.py
@@ -10,7 +10,7 @@
 class TensorInplaceAssign:
     r"""
     This class is used as a context manager, instantiated with either a `torch.nn.Module` or
-    `onediff.infer_compiler.deployable_module.DeployableModule` during initialization.
+    `onediff.infer_compiler.backends.deployable_module.DeployableModule` during initialization.
     Within the context manager, all Tensors associated with the provided module will be
     transformed into AutoInplaceCopyTensor. After transformed, assignments to Tensor.data are
     modified to in-place copying.
diff --git a/src/onediff/infer_compiler/utils/module_operations.py b/src/onediff/torch_utils/module_operations.py
similarity index 99%
rename from src/onediff/infer_compiler/utils/module_operations.py
rename to src/onediff/torch_utils/module_operations.py
index c31856227..04cac3e58 100644
--- a/src/onediff/infer_compiler/utils/module_operations.py
+++ b/src/onediff/torch_utils/module_operations.py
@@ -16,7 +16,7 @@ def get_sub_module(module, sub_module_name) -> nn.Module:
     """
     if sub_module_name == "":
         return module
-    
+
     parts = sub_module_name.split(".")
     current_module = module
 
diff --git a/src/onediff/utils/__init__.py b/src/onediff/utils/__init__.py
new file mode 100644
index 000000000..631812a59
--- /dev/null
+++ b/src/onediff/utils/__init__.py
@@ -0,0 +1,7 @@
+from .log_utils import logger
+from .env_var import (
+    parse_boolean_from_env,
+    set_boolean_env_var,
+    parse_integer_from_env,
+    set_integer_env_var,
+)
diff --git a/src/onediff/utils/env_var.py b/src/onediff/utils/env_var.py
new file mode 100644
index 000000000..23b6e749b
--- /dev/null
+++ b/src/onediff/utils/env_var.py
@@ -0,0 +1,31 @@
+import os
+from typing import Optional
+
+
+def parse_boolean_from_env(env_var, default_value=None):
+    env_var = os.getenv(env_var)
+    if env_var is None:
+        return default_value
+    env_var = env_var.lower()
+    return env_var in ("1", "true", "yes", "on", "y")
+
+
+def set_boolean_env_var(env_var: str, val: Optional[bool]):
+    if val is None:
+        os.environ.pop(env_var, None)
+    else:
+        os.environ[env_var] = "1" if val else "0"
+
+
+def parse_integer_from_env(env_var, default_value=None):
+    env_var = os.getenv(env_var)
+    if env_var is None:
+        return default_value
+    return int(env_var)
+
+
+def set_integer_env_var(env_var: str, val: Optional[int]):
+    if val is None:
+        os.environ.pop(env_var, None)
+    else:
+        os.environ[env_var] = str(int(val))
diff --git a/src/onediff/infer_compiler/utils/log_utils.py b/src/onediff/utils/log_utils.py
similarity index 100%
rename from src/onediff/infer_compiler/utils/log_utils.py
rename to src/onediff/utils/log_utils.py
diff --git a/tests/comfy-docker-compose.yml b/tests/comfy-docker-compose.yml
index a8a7a7171..99f4d7f4d 100644
--- a/tests/comfy-docker-compose.yml
+++ b/tests/comfy-docker-compose.yml
@@ -33,6 +33,9 @@ services:
       - $HOME/test-container-cache-${CONTAINER_NAME}/dot-cache:/root/.cache
       - /share_nfs:/share_nfs:ro
       - ${PWD}/${COMFYUI_SRC_DIR}:/app/ComfyUI
+      - /share_nfs/hf_models/comfyui_resources/custom_nodes/ComfyUI_IPAdapter_plus:/app/ComfyUI/custom_nodes/ComfyUI_IPAdapter_plus
+      - /share_nfs/hf_models/comfyui_resources/input/input_image_vermeer.png:/app/ComfyUI/input/input_image_vermeer.png:ro
+      - /share_nfs/hf_models/comfyui_resources/input/a_car.png:/app/ComfyUI/input/a_car.png:ro
       - ${PWD}/onediff_comfy_nodes:/app/ComfyUI/custom_nodes/onediff_comfy_nodes
       - ${SDXL_BASE}:/app/ComfyUI/models/checkpoints/sd_xl_base_1.0.safetensors:ro
       - ${UNET_INT8}:/app/ComfyUI/models/unet_int8/unet_int8:ro
diff --git a/tests/comfyui/extra_model_paths.yaml b/tests/comfyui/extra_model_paths.yaml
index 88abe1cdd..007a395e9 100644
--- a/tests/comfyui/extra_model_paths.yaml
+++ b/tests/comfyui/extra_model_paths.yaml
@@ -9,13 +9,14 @@ comfyui:
     # checkpoints: /home/fengwen/workspace/test_checkpoints
     checkpoints: /share_nfs/hf_models/comfyui_resources/checkpoints
     clip: models/clip/
-    clip_vision: models/clip_vision/
+    clip_vision: /share_nfs/hf_models/comfyui_resources/clip_vision
     configs: models/configs/
     controlnet: models/controlnet/
     embeddings: models/embeddings/
-    loras: models/loras/
+    loras: /share_nfs/hf_models/comfyui_resources/loras/
     upscale_models: models/upscale_models/
     vae: models/vae/
+    ipadapter: /share_nfs/hf_models/comfyui_resources/ipadapter
 
 #other_ui:
 #    base_path: path/to/ui
diff --git a/tests/comfyui/test_by_ui.py b/tests/comfyui/test_by_ui.py
index 6b8253222..9ee92836a 100644
--- a/tests/comfyui/test_by_ui.py
+++ b/tests/comfyui/test_by_ui.py
@@ -174,9 +174,14 @@ def launch_prompt(driver):
 
         print(f"launch the queue prompt (timeout: {args.timeout}s) ...")
         launch_and_wait(driver, timeout=args.timeout)
+
+        duration = time.time() - start_time
         print(
-            f"{args.workflow} has finished, time elapsed: {time.time() - start_time:.1f}"
+            f"{args.workflow} has finished, time elapsed: {duration:.1f}"
         )
+        
+        if duration < 2:
+            raise ValueError("Execution duration is too short, possible error in workflow execution")
 
         print(f"check if error occurs...")
         check_error_occurs(driver)
diff --git a/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json b/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json
index 55694121d..1af8e9adb 100644
--- a/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json
+++ b/tests/comfyui/workflows/sdxl-unet-speedup-graph-saver.json
@@ -40,44 +40,6 @@
         "text, watermark"
       ]
     },
-    {
-      "id": 6,
-      "type": "CLIPTextEncode",
-      "pos": [
-        515,
-        318
-      ],
-      "size": {
-        "0": 422.84503173828125,
-        "1": 164.31304931640625
-      },
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 3
-        }
-      ],
-      "outputs": [
-        {
-          "name": "CONDITIONING",
-          "type": "CONDITIONING",
-          "links": [
-            4
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPTextEncode"
-      },
-      "widgets_values": [
-        "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,"
-      ]
-    },
     {
       "id": 3,
       "type": "KSampler",
@@ -129,7 +91,7 @@
         "Node name for S&R": "KSampler"
       },
       "widgets_values": [
-        156680208700286,
+        371953008319175,
         "randomize",
         20,
         8,
@@ -179,51 +141,29 @@
       }
     },
     {
-      "id": 4,
-      "type": "CheckpointLoaderSimple",
+      "id": 9,
+      "type": "SaveImage",
       "pos": [
-        100,
+        1765.2780151367188,
         130
       ],
       "size": {
-        "0": 315,
-        "1": 98
+        "0": 241.92205810546875,
+        "1": 270
       },
       "flags": {},
-      "order": 0,
+      "order": 8,
       "mode": 0,
-      "outputs": [
-        {
-          "name": "MODEL",
-          "type": "MODEL",
-          "links": [
-            10
-          ],
-          "slot_index": 0
-        },
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            3,
-            5
-          ],
-          "slot_index": 1
-        },
+      "inputs": [
         {
-          "name": "VAE",
-          "type": "VAE",
-          "links": [
-            8
-          ],
-          "slot_index": 2
+          "name": "images",
+          "type": "IMAGE",
+          "link": 9
         }
       ],
-      "properties": {
-        "Node name for S&R": "CheckpointLoaderSimple"
-      },
+      "properties": {},
       "widgets_values": [
-        "sd_xl_base_1.0.safetensors"
+        "model-speedup"
       ]
     },
     {
@@ -238,7 +178,7 @@
         "1": 106
       },
       "flags": {},
-      "order": 1,
+      "order": 0,
       "mode": 0,
       "outputs": [
         {
@@ -254,47 +194,55 @@
         "Node name for S&R": "EmptyLatentImage"
       },
       "widgets_values": [
-        1024,
-        1024,
+        512,
+        512,
         1
       ]
     },
     {
-      "id": 9,
-      "type": "SaveImage",
+      "id": 12,
+      "type": "ModelGraphSaver",
       "pos": [
-        1765.2780151367188,
-        130
+        1457,
+        353
       ],
       "size": {
-        "0": 241.92205810546875,
-        "1": 58
+        "0": 315,
+        "1": 78
       },
       "flags": {},
-      "order": 8,
+      "order": 7,
       "mode": 0,
       "inputs": [
         {
-          "name": "images",
-          "type": "IMAGE",
-          "link": 9
+          "name": "samples",
+          "type": "LATENT",
+          "link": 13,
+          "slot_index": 0
+        },
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 14
         }
       ],
-      "properties": {},
+      "properties": {
+        "Node name for S&R": "ModelGraphSaver"
+      },
       "widgets_values": [
-        "model-speedup"
+        "sd1.5-unet"
       ]
     },
     {
       "id": 10,
       "type": "ModelSpeedup",
       "pos": [
-        515,
-        130
+        482,
+        133
       ],
       "size": {
         "0": 315,
-        "1": 58
+        "1": 78
       },
       "flags": {},
       "order": 2,
@@ -305,6 +253,11 @@
           "type": "MODEL",
           "link": 10,
           "slot_index": 0
+        },
+        {
+          "name": "custom_booster",
+          "type": "CUSTOM_BOOSTER",
+          "link": null
         }
       ],
       "outputs": [
@@ -323,41 +276,93 @@
         "Node name for S&R": "ModelSpeedup"
       },
       "widgets_values": [
-        "enable"
+        true
       ]
     },
     {
-      "id": 12,
-      "type": "ModelGraphSaver",
+      "id": 6,
+      "type": "CLIPTextEncode",
       "pos": [
-        1457,
-        353
+        564,
+        357
       ],
       "size": {
-        "0": 315,
-        "1": 78
+        "0": 422.84503173828125,
+        "1": 164.31304931640625
       },
       "flags": {},
-      "order": 7,
+      "order": 3,
       "mode": 0,
       "inputs": [
         {
-          "name": "samples",
-          "type": "LATENT",
-          "link": 13,
+          "name": "clip",
+          "type": "CLIP",
+          "link": 3
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "links": [
+            4
+          ],
           "slot_index": 0
-        },
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,"
+      ]
+    },
+    {
+      "id": 4,
+      "type": "CheckpointLoaderSimple",
+      "pos": [
+        100,
+        130
+      ],
+      "size": {
+        "0": 315,
+        "1": 98
+      },
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "outputs": [
         {
-          "name": "model",
+          "name": "MODEL",
           "type": "MODEL",
-          "link": 14
+          "links": [
+            10
+          ],
+          "slot_index": 0
+        },
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            3,
+            5
+          ],
+          "slot_index": 1
+        },
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "links": [
+            8
+          ],
+          "slot_index": 2
         }
       ],
       "properties": {
-        "Node name for S&R": "ModelGraphSaver"
+        "Node name for S&R": "CheckpointLoaderSimple"
       },
       "widgets_values": [
-        "sdxl-unet"
+        "sd_xl_base_1.0.safetensors"
       ]
     }
   ],
@@ -461,6 +466,14 @@
   ],
   "groups": [],
   "config": {},
-  "extra": {},
+  "extra": {
+    "ds": {
+      "scale": 0.8390545288824038,
+      "offset": {
+        "0": -249.2752477861829,
+        "1": 56.895086690219436
+      }
+    }
+  },
   "version": 0.4
 }
\ No newline at end of file
diff --git a/tests/convert_torch_to_of/test_patch_for_compiling.py b/tests/convert_torch_to_of/test_patch_for_compiling.py
index 6df8ac758..21844a4fa 100644
--- a/tests/convert_torch_to_of/test_patch_for_compiling.py
+++ b/tests/convert_torch_to_of/test_patch_for_compiling.py
@@ -6,7 +6,7 @@
 """
 import pytest
 import numpy as np
-from onediff.infer_compiler.utils.patch_for_compiler import FakeCuda
+from onediff.infer_compiler.backends.oneflow.import_tools.patch_for_compiler import FakeCuda
 
 
 @pytest.mark.parametrize("batch_size", [8])
diff --git a/tests/convert_torch_to_of/test_torch2of_demo.py b/tests/convert_torch_to_of/test_torch2of_demo.py
index eabb63f1e..df4eb5202 100644
--- a/tests/convert_torch_to_of/test_torch2of_demo.py
+++ b/tests/convert_torch_to_of/test_torch2of_demo.py
@@ -9,7 +9,7 @@
 import unittest
 import numpy as np
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.transform import transform_mgr
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
 
 
 class PyTorchModel(torch.nn.Module):
diff --git a/tests/sd-webui/cat.png b/tests/sd-webui/cat.png
new file mode 100644
index 000000000..e610e7081
Binary files /dev/null and b/tests/sd-webui/cat.png differ
diff --git a/tests/sd-webui/test_api.py b/tests/sd-webui/test_api.py
new file mode 100644
index 000000000..21c1f0719
--- /dev/null
+++ b/tests/sd-webui/test_api.py
@@ -0,0 +1,95 @@
+import base64
+import pytest
+import requests
+from pathlib import Path
+
+def encode_file_to_base64(path):
+    with open(path, "rb") as file:
+        return base64.b64encode(file.read()).decode("utf-8")
+
+def post_request(url, data):
+    response = requests.post(url, json=data)
+    assert response.status_code == 200
+    return response
+
+@pytest.fixture()
+def base_url():
+    return f"http://127.0.0.1:7860"
+
+@pytest.fixture()
+def url_txt2img(base_url):
+    return f"{base_url}/sdapi/v1/txt2img"
+
+@pytest.fixture()
+def url_img2img(base_url):
+    return f"{base_url}/sdapi/v1/img2img"
+
+@pytest.fixture()
+def url_set_config(base_url):
+    return f"{base_url}/sdapi/v1/options"
+
+@pytest.fixture()
+def simple_txt2img_request():
+    return {
+        "prompt": "1girl",
+        "negative_prompt": "",
+        "seed": 1,
+        "steps": 20,
+        "width": 1024,
+        "height": 1024,
+        "cfg_scale": 7,
+        "n_iter": 1,
+        "batch_size": 1,
+
+        # Enable OneDiff speed up
+        "script_name": "onediff_diffusion_model",
+
+        "script_args" : [
+            False, # quantization
+            None,  # graph_checkpoint
+            "",    # saved_graph_name
+        ],
+    }
+
+def test_txt2img_onediff(url_txt2img, simple_txt2img_request):
+    data = simple_txt2img_request
+    post_request(url_txt2img, data)
+
+def test_img2img_onediff(url_img2img, simple_txt2img_request):
+    img_path = str(Path(__file__).parent / "cat.png")
+    init_images = {"init_images": [encode_file_to_base64(img_path)]}
+    data = {**simple_txt2img_request, **init_images}
+    post_request(url_img2img, data)
+
+def test_txt2img_onediff_quant(url_txt2img, simple_txt2img_request):
+    script_args = {
+        "script_args": [
+            True,           # quantization
+            None,           # graph_checkpoint
+            "saved_graph",  # saved_graph_name
+        ]
+    }
+    data = {**simple_txt2img_request, **script_args}
+    post_request(url_txt2img, data)
+
+def test_txt2img_onediff_save_graph(url_txt2img, simple_txt2img_request):
+    script_args = {
+        "script_args": [
+            False,          # quantization
+            None,           # graph_checkpoint
+            "saved_graph",  # saved_graph_name
+        ]
+    }
+    data = {**simple_txt2img_request, **script_args}
+    post_request(url_txt2img, data)
+
+def test_txt2img_onediff_load_graph(url_txt2img, simple_txt2img_request):
+    script_args = {
+        "script_args": [
+            False,          # quantization
+            "saved_graph",  # graph_checkpoint
+            "",             # saved_graph_name
+        ]
+    }
+    data = {**simple_txt2img_request, **script_args}
+    post_request(url_txt2img, data)
diff --git a/tests/test_dual_module_list.py b/tests/test_dual_module_list.py
index 96e686b2a..28a711404 100644
--- a/tests/test_dual_module_list.py
+++ b/tests/test_dual_module_list.py
@@ -1,6 +1,6 @@
 import numpy as np
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.transform import register
+from onediff.infer_compiler.backends.oneflow.transform import register
 import torch
 import torch.nn as nn
 import oneflow as flow
@@ -39,7 +39,7 @@ def forward(self, x):
 
 assert np.allclose(y_torch.detach().cpu(), y_oneflow.detach().cpu(), 1e-03, 1e-03)
 
-from onediff.infer_compiler.oneflow.dual_module import DualModule, DualModuleList
+from onediff.infer_compiler.backends.oneflow.dual_module import DualModule, DualModuleList
 
 assert isinstance(m.linears, DualModuleList)
 
diff --git a/tests/test_quantize_custom_model.py b/tests/test_quantize_custom_model.py
index 36d92f9fc..00a2fbce5 100644
--- a/tests/test_quantize_custom_model.py
+++ b/tests/test_quantize_custom_model.py
@@ -7,8 +7,8 @@
 from torch import nn
 
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.transform import register
-from onediff.infer_compiler.utils import is_community_version
+from onediff.infer_compiler.backends.oneflow.transform import register
+from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version
 
 is_community = is_community_version()
 onediff_quant_spec = importlib.util.find_spec("onediff_quant")
diff --git a/tests/webui-docker-compose.yml b/tests/webui-docker-compose.yml
new file mode 100644
index 000000000..6486726fb
--- /dev/null
+++ b/tests/webui-docker-compose.yml
@@ -0,0 +1,56 @@
+version: "3.8"
+
+services:
+  onediff-test:
+    container_name: ${CONTAINER_NAME}
+    image: ${ACR_ORG}/${MATRIX_IMAGE}
+    command: sleep 5400
+    privileged: true
+    shm_size: 8g
+    network_mode: host
+    pids_limit: 2000
+    cap_add:
+      - SYS_PTRACE
+    security_opt:
+      - seccomp=unconfined
+    environment:
+      HF_HUB_OFFLINE: "1"
+      ONEFLOW_MLIR_ENABLE_TIMING: "1"
+      ONEFLOW_MLIR_PRINT_STATS: "1"
+      CI: "1"
+      SILICON_ONEDIFF_LICENSE_KEY: ${SILICON_ONEDIFF_LICENSE_KEY}
+
+      INDEX_URL: "https://pypi.tuna.tsinghua.edu.cn/simple"
+      CLIP_PACKAGE: "git+file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/CLIP"
+      OPENCLIP_PACKAGE: "git+file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/open_clip"
+      ASSETS_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/stable-diffusion-webui-assets"
+      STABLE_DIFFUSION_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/stable-diffusion-stability-ai"
+      STABLE_DIFFUSION_XL_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/generative-models"
+      K_DIFFUSION_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/k-diffusion"
+      BLIP_REPO: "file:///app/${WEBUI_SRC_DIR}/${WEBUI_DEPENDENCIES_SUBDIR}/BLIP"
+
+      ASSETS_COMMIT_HASH: ${ASSETS_COMMIT_HASH}
+      STABLE_DIFFUSION_COMMIT_HASH: ${STABLE_DIFFUSION_COMMIT_HASH}
+      STABLE_DIFFUSION_XL_COMMIT_HASH: ${STABLE_DIFFUSION_XL_COMMIT_HASH}
+      K_DIFFUSION_COMMIT_HASH: ${K_DIFFUSION_COMMIT_HASH}
+      BLIP_COMMIT_HASH: ${BLIP_COMMIT_HASH}
+
+      SAFE_DIRECTORIES: |
+        CLIP
+        open_clip
+        stable-diffusion-webui-assets
+        stable-diffusion-stability-ai
+        generative-models
+        k-diffusion
+        BLIP
+
+
+    volumes:
+      - $HOME/test-container-cache-${CONTAINER_NAME}/dot-local:/root/.local
+      - $HOME/test-container-cache-${CONTAINER_NAME}/dot-cache:/root/.cache
+      - /share_nfs:/share_nfs:ro
+      - ${PWD}/${WEBUI_SRC_DIR}:/app/${WEBUI_SRC_DIR}
+      - ${PWD}/onediff_sd_webui_extensions:/app/${WEBUI_SRC_DIR}/extensions/onediff_sd_webui_extensions
+      - $PWD:/src/onediff
+    working_dir: /src/onediff
+    restart: "no"