diff --git a/intel_extension_for_pytorch/__init__.py b/intel_extension_for_pytorch/__init__.py
index 27ebb7678..17bcddca0 100644
--- a/intel_extension_for_pytorch/__init__.py
+++ b/intel_extension_for_pytorch/__init__.py
@@ -2,7 +2,7 @@
 import re
 
 import torch
-import warnings
+
 
 try:
     import torchvision
@@ -128,12 +128,14 @@
 from . import _dynamo
 from . import _meta_registrations
 from ._init_on_device import OnDevice
+from .utils._logger import logger, WarningType
 
 try:
     from .cpu import tpp
 except BaseException:
-    warnings.warn(
-        "Please install transformers repo when you want to use fast_bert API."
+    logger.warn(
+        "Please install transformers repo when you want to use fast_bert API.",
+        _type=WarningType.MissingArgument,
     )
 
 from .frontend import optimize
diff --git a/intel_extension_for_pytorch/_inductor/compiler.py b/intel_extension_for_pytorch/_inductor/compiler.py
index 3b444099e..37776aad0 100644
--- a/intel_extension_for_pytorch/_inductor/compiler.py
+++ b/intel_extension_for_pytorch/_inductor/compiler.py
@@ -2,9 +2,8 @@
 from torch._subclasses import FakeTensor
 from torch.utils._mode_utils import no_dispatch
 import builtins
-import warnings
 from typing import Callable, Dict, Optional, Union, List
-
+from ..utils._logger import logger, WarningType
 
 _compiler_backend = "inductor"
 
@@ -66,7 +65,10 @@ def defake(x):
                     traced_model = torch.jit.freeze(traced_model)
                 return traced_model
         except Exception:
-            warnings.warn("JIT trace failed during the IPEX compile process.")
+            logger.warning(
+                "JIT trace failed during the IPEX compile process.",
+                _type=WarningType.NotSupported,
+            )
             return model
     else:
         raise RuntimeError(
diff --git a/intel_extension_for_pytorch/_inductor/decomposition.py b/intel_extension_for_pytorch/_inductor/decomposition.py
index 6b41a474a..02367556b 100644
--- a/intel_extension_for_pytorch/_inductor/decomposition.py
+++ b/intel_extension_for_pytorch/_inductor/decomposition.py
@@ -1,14 +1,14 @@
-import logging
 import torch._decomp as decomp
 
-log = logging.getLogger(__name__)
+from ..utils._logger import logger
+
 decomposition_overrides = {}
 
 
 def register_decomposition(ops):
     for op in [ops] if callable(ops) else ops:
         if op in decomposition_overrides:
-            log.warning(f"duplicate decomp: {ops}")
+            logger.warning(f"duplicate decomp: {ops}")
     return decomp.register_decomposition(ops, decomposition_overrides)
 
 
diff --git a/intel_extension_for_pytorch/cpu/auto_ipex.py b/intel_extension_for_pytorch/cpu/auto_ipex.py
index f41191390..63469bf1c 100644
--- a/intel_extension_for_pytorch/cpu/auto_ipex.py
+++ b/intel_extension_for_pytorch/cpu/auto_ipex.py
@@ -1,17 +1,13 @@
 import os
 import platform
 import glob
-import logging
+from ..utils._logger import logger, WarningType
 import sys
 from argparse import ArgumentParser, REMAINDER
 from argparse import RawTextHelpFormatter
 from tempfile import mkstemp
 import uuid
 
-format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-logging.basicConfig(level=logging.INFO, format=format_str)
-logger = logging.getLogger(__name__)
-
 
 def apply_monkey_patch(program, dtype, auto_ipex_verbose, disable_ipex_graph_mode):
     # Auto apply the ipex features
@@ -213,7 +209,9 @@ def main():
                     lst_valid.append(item)
                 else:
                     logger.warning(
-                        "{} doesn't exist. Removing it from LD_PRELOAD.".format(item)
+                        f"You have set {item} into LD_PRELOAD but it doesn't exist. Removing it from LD_PRELOAD."
+                        + "please install it if you want it or remove it from LD_PRELOAD if you don't",
+                        _type=WarningType.MissingDependency,
                     )
         if len(lst_valid) > 0:
             os.environ["LD_PRELOAD"] = ":".join(lst_valid)
diff --git a/intel_extension_for_pytorch/cpu/graph_capture.py b/intel_extension_for_pytorch/cpu/graph_capture.py
index 7e0fa0e11..191495549 100644
--- a/intel_extension_for_pytorch/cpu/graph_capture.py
+++ b/intel_extension_for_pytorch/cpu/graph_capture.py
@@ -7,9 +7,9 @@
 from typing import List
 
 import functools
-import logging
 import threading
 import warnings
+from ..utils._logger import logger, WarningType
 
 
 class RunMethods(IntEnum):
@@ -37,7 +37,10 @@ def compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
                     traced_model = torch.jit.freeze(traced_model)
                 return traced_model
             except Exception:
-                warnings.warn("JIT trace failed during the 'compiler' process.")
+                logger.warning(
+                    "JIT trace failed during the 'compiler' process.",
+                    _type=WarningType.NotSupported,
+                )
                 return gm
 
         @functools.wraps(func)
@@ -62,8 +65,9 @@ def forward(*input, **kwargs):
                             else:
                                 return self.model(*input, **kwargs)
                         if self.train:
-                            warnings.warn(
-                                "graph capture does not support training yet."
+                            logger.warning(
+                                "graph capture does not support training yet.",
+                                _type=WarningType.NotSupported,
                             )
                             self.method = RunMethods.EagerTrain
                             return func(*input, **kwargs)
@@ -89,7 +93,7 @@ def forward(*input, **kwargs):
                                     output = traced_model(*input, **kwargs)
                                     self.model = traced_model
                                     self.method = RunMethods.JIT
-                                    logging.debug("generate graph by JIT trace.")
+                                    logger.debug("generate graph by JIT trace.")
                                     return output
                             except BaseException:
                                 try:
@@ -101,11 +105,12 @@ def forward(*input, **kwargs):
                                     output = dynamo_model(*input, **kwargs)
                                     self.model = dynamo_model
                                     self.method = RunMethods.TorchDynamo
-                                    logging.debug("generate graph by TorchDynamo.")
+                                    logger.debug("generate graph by TorchDynamo.")
                                     return output
                                 except BaseException:
-                                    warnings.warn(
-                                        "Both JIT and TorchDynamo failed, fallback to original model."
+                                    logger.warning(
+                                        "Both JIT and TorchDynamo failed, fallback to original model.",
+                                        _type=WarningType.NotSupported,
                                     )
                                     self.method = RunMethods.EagerInfer
                                     torch._dynamo.reset()
diff --git a/intel_extension_for_pytorch/cpu/hypertune/objective.py b/intel_extension_for_pytorch/cpu/hypertune/objective.py
index a39263fac..4304acfa4 100644
--- a/intel_extension_for_pytorch/cpu/hypertune/objective.py
+++ b/intel_extension_for_pytorch/cpu/hypertune/objective.py
@@ -1,6 +1,7 @@
 # reference: https://github.com/intel/neural-compressor/blob/\
 #            15477100cef756e430c8ef8ef79729f0c80c8ce6/neural_compressor/objective.py
 import subprocess
+from ...utils._logger import logger, WarningType
 
 
 class MultiObjective(object):
@@ -39,7 +40,10 @@ def deprecate_config(self, cfg, deprecated, new, default):
         ), f"Configurations {deprecated} and {new} cannot be set at the same time."
         ret = default
         if v_deprecated != default:
-            print(f"[**Warning**] Configuration {deprecated} is deprecated by {new}.")
+            logger.warn(
+                f"[**Warning**] Configuration {deprecated} is deprecated by {new}.",
+                _type=WarningType.DeprecatedArgument,
+            )
             ret = v_deprecated
         if v_new != default:
             ret = v_new
diff --git a/intel_extension_for_pytorch/cpu/launch/cpu_info.py b/intel_extension_for_pytorch/cpu/launch/cpu_info.py
index 6e51e7de0..b90011bee 100644
--- a/intel_extension_for_pytorch/cpu/launch/cpu_info.py
+++ b/intel_extension_for_pytorch/cpu/launch/cpu_info.py
@@ -3,6 +3,7 @@
 import platform
 import re
 import subprocess
+from ...utils._logger import WarningType
 
 # lscpu Examples
 # # The following is the parsable format, which can be fed to other
@@ -206,7 +207,7 @@ def __init__(self, logger=None, lscpu_txt=""):
                     if c.maxmhz in e_core_mhzs:
                         c.is_p_core = False
 
-    def verbose(self, level, msg):
+    def verbose(self, level, msg, warning_type=None):
         if self.logger:
             logging_fn = {
                 "warning": self.logger.warning,
@@ -215,7 +216,7 @@ def verbose(self, level, msg):
             assert (
                 level in logging_fn.keys()
             ), f"Unrecognized logging level {level} is detected. Available levels are {logging_fn.keys()}."
-            logging_fn[level](msg)
+            logging_fn[level](msg, _type=warning_type)
         else:
             print(msg)
 
@@ -264,12 +265,18 @@ def gen_pools_ondemand(
             if use_logical_cores:
                 self.verbose(
                     "warning",
-                    "Argument --use-logical-cores won't take effect when --cores-list is set.",
+                    "Argument --use-logical-cores won't take effect when --cores-list is set."
+                    + "please see https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/launch_script.html#launch-script-usage-guide"  # noqa: B950
+                    + "for usage guide",
+                    warning_type=WarningType.AmbiguousArgument,
                 )
             if use_e_cores:
                 self.verbose(
                     "warning",
                     "Argument --use-e-cores won't take effect when --cores-list is set.",
+                    +"please see https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/launch_script.html#launch-script-usage-guide"  # noqa: B950
+                    + "for usage guide",
+                    warning_type=WarningType.AmbiguousArgument,
                 )
             pool = [c for c in self.pool_all if c.cpu in cores_list]
             nodes = list(set([c.node for c in pool]))
@@ -284,6 +291,9 @@ def gen_pools_ondemand(
                         self.verbose(
                             "warning",
                             "Argument --skip-cross-node-cores cannot take effect on the designated cores. Disabled.",
+                            +"please see https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/launch_script.html#launch-script-usage-guide"  # noqa: B950
+                            + "for usage guide",
+                            warning_type=WarningType.WrongArgument,
                         )
                         break
         else:
@@ -302,7 +312,7 @@ def gen_pools_ondemand(
                 e_cores = [c.cpu for c in pool if not c.is_p_core]
                 if len(e_cores) > 0:
                     self.verbose(
-                        "warning",
+                        "info",
                         f"Efficient-Cores are detected ({e_cores}). Disabled for performance consideration. \
                             You can enable them with argument --use-e-cores.",
                     )
@@ -348,8 +358,11 @@ def gen_pools_ondemand(
             if skip_cross_node_cores:
                 self.verbose(
                     "warning",
-                    "Argument --skip-cross-node-cores won't take effect when both --ninstances and \
-                        --ncores-per-instance are explicitly set.",
+                    "Argument --skip-cross-node-cores won't take effect when both --ninstances and"
+                    + " --ncores-per-instance are explicitly set."
+                    + "please see https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/launch_script.html#launch-script-usage-guide"  # noqa: B950
+                    + "for usage guide",
+                    warning_type=WarningType.AmbiguousArgument,
                 )
         assert (
             ninstances * ncores_per_instance > 0
diff --git a/intel_extension_for_pytorch/cpu/launch/launch.py b/intel_extension_for_pytorch/cpu/launch/launch.py
index 1fde11e3d..00564198b 100644
--- a/intel_extension_for_pytorch/cpu/launch/launch.py
+++ b/intel_extension_for_pytorch/cpu/launch/launch.py
@@ -8,6 +8,7 @@
 import intel_extension_for_pytorch.cpu.auto_ipex as auto_ipex
 from .launcher_distributed import DistributedTrainingLauncher
 from .launcher_multi_instances import MultiInstancesLauncher
+from ...utils._logger import logger, WarningType
 
 """
 This is a script for launching PyTorch training and inference on Intel Xeon CPU with optimal configurations.
@@ -220,53 +221,72 @@ def add_deprecated_params(parser):
 
 def process_deprecated_params(args, logger):
     if args.nproc_per_node != -1:
-        logger.warning("Argument --nproc_per_node is deprecated by --nprocs-per-node.")
+        logger.warning(
+            "Argument --nproc_per_node is deprecated by --nprocs-per-node.",
+            _type=WarningType.DeprecatedArgument,
+        )
         args.nprocs_per_node = args.nproc_per_node
     if args.more_mpi_params != "":
         logger.warning(
-            "Argument --more_mpi_params is deprecated by --extra-mpi-params."
+            "Argument --more_mpi_params is deprecated by --extra-mpi-params.",
+            _type=WarningType.DeprecatedArgument,
         )
         args.extra_mpi_params = args.more_mpi_params
     if args.ncore_per_instance != -1:
         logger.warning(
-            "Argument --ncore_per_instance is deprecated by --ncores-per-instance."
+            "Argument --ncore_per_instance is deprecated by --ncores-per-instance.",
+            _type=WarningType.DeprecatedArgument,
         )
         args.ncores_per_instance = args.ncore_per_instance
     if args.node_id != -1:
-        logger.warning("Argument --node_id is deprecated by --nodes-list.")
+        logger.warning(
+            "Argument --node_id is deprecated by --nodes-list.",
+            _type=WarningType.DeprecatedArgument,
+        )
         args.nodes_list = str(args.node_id)
     if args.core_list != "":
-        logger.warning("Argument --core_list is deprecated by --cores-list.")
+        logger.warning(
+            "Argument --core_list is deprecated by --cores-list.",
+            _type=WarningType.DeprecatedArgument,
+        )
         args.cores_list = args.core_list
     if args.logical_core_for_ccl:
         logger.warning(
-            "Argument --logical_core_for_ccl is deprecated by --logical-cores-for-ccl."
+            "Argument --logical_core_for_ccl is deprecated by --logical-cores-for-ccl.",
+            _type=WarningType.DeprecatedArgument,
         )
         args.logical_cores_for_ccl = args.logical_core_for_ccl
     if args.use_logical_core:
         logger.warning(
-            "Argument --use_logical_core is deprecated by --use-logical-cores."
+            "Argument --use_logical_core is deprecated by --use-logical-cores.",
+            _type=WarningType.DeprecatedArgument,
         )
         args.use_logical_cores = args.use_logical_core
     if args.log_path != "":
-        logger.warning("Argument --log_path is deprecated by --log-dir.")
+        logger.warning(
+            "Argument --log_path is deprecated by --log-dir.",
+            _type=WarningType.DeprecatedArgument,
+        )
         args.log_dir = args.log_path
 
     if args.multi_instance:
         logger.info(
-            "Argument --multi_instance is deprecated. Will be removed. \
-                If you are using the deprecated argument, please update it to the new one."
+            "Argument --multi_instance is deprecated. Will be removed."
+            + "If you are using the deprecated argument, please update it to the new one.",
+            _type=WarningType.DeprecatedArgument,
         )
     if args.distributed:
         logger.info(
-            "Argument --distributed is deprecated. Will be removed. \
-                If you are using the deprecated argument, please update it to the new one."
+            "Argument --distributed is deprecated. Will be removed."
+            + "If you are using the deprecated argument, please update it to the new one.",
+            _type=WarningType.DeprecatedArgument,
         )
 
     if args.enable_tcmalloc or args.enable_jemalloc or args.use_default_allocator:
         logger.warning(
-            "Arguments --enable_tcmalloc, --enable_jemalloc and --use_default_allocator \
-                are deprecated by --memory-allocator."
+            "Arguments --enable_tcmalloc, --enable_jemalloc and --use_default_allocator"
+            + "are deprecated by --memory-allocator tcmalloc/jemalloc/auto.",
+            _type=WarningType.DeprecatedArgument,
         )
         if args.use_default_allocator:
             args.memory_allocator = "default"
@@ -276,16 +296,21 @@ def process_deprecated_params(args, logger):
             args.memory_allocator = "tcmalloc"
     if args.disable_numactl:
         logger.warning(
-            "Argument --disable_numactl is deprecated by --multi-task-manager."
+            "Argument --disable_numactl is deprecated by --multi-task-manager taskset.",
+            _type=WarningType.DeprecatedArgument,
         )
         args.multi_task_manager = "taskset"
     if args.disable_taskset:
         logger.warning(
-            "Argument --disable_taskset is deprecated by --multi-task-manager."
+            "Argument --disable_taskset is deprecated by --multi-task-manager numactl.",
+            _type=WarningType.DeprecatedArgument,
         )
         args.multi_task_manager = "numactl"
     if args.disable_iomp:
-        logger.warning("Argument --disable_iomp is deprecated by --omp-runtime.")
+        logger.warning(
+            "Argument --disable_iomp is deprecated by --omp-runtime default.",
+            _type=WarningType.DeprecatedArgument,
+        )
         args.omp_runtime = "default"
 
 
@@ -383,10 +408,6 @@ def run_main_with_args(args):
     if platform.system() == "Windows":
         raise RuntimeError("Windows platform is not supported!!!")
 
-    format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-    logging.basicConfig(level=logging.INFO, format=format_str)
-    logger = logging.getLogger(__name__)
-
     launcher_distributed = DistributedTrainingLauncher(logger)
     launcher_multi_instances = MultiInstancesLauncher(logger)
 
diff --git a/intel_extension_for_pytorch/cpu/launch/launcher_base.py b/intel_extension_for_pytorch/cpu/launch/launcher_base.py
index 933df0084..0b06cafef 100644
--- a/intel_extension_for_pytorch/cpu/launch/launcher_base.py
+++ b/intel_extension_for_pytorch/cpu/launch/launcher_base.py
@@ -2,6 +2,7 @@
 from os.path import expanduser
 import glob
 from .cpu_info import CPUPoolList
+from ...utils._logger import WarningType
 
 
 class Launcher:
@@ -75,7 +76,7 @@ def add_common_params(self, parser):
             help=f"Choose which OpenMP runtime to run the workloads with. Supported choices are {self.omp_supported}.",
         )
 
-    def verbose(self, level, msg):
+    def verbose(self, level, msg, warning_type=None):
         if self.logger:
             logging_fn = {
                 "warning": self.logger.warning,
@@ -84,7 +85,7 @@ def verbose(self, level, msg):
             assert (
                 level in logging_fn.keys()
             ), f"Unrecognized logging level {level} is detected. Available levels are {logging_fn.keys()}."
-            logging_fn[level](msg)
+            logging_fn[level](msg, _type=warning_type)
         else:
             print(msg)
 
@@ -118,8 +119,9 @@ def add_env(self, env_name, env_value):
         if value != "" and value != env_value:
             self.verbose(
                 "warning",
-                f"{env_name} in environment variable is {os.environ[env_name]} while the value you would like to set \
-                    is {env_value}. Use the exsiting value.",
+                f"{env_name} in environment variable is {os.environ[env_name]} while the value you would like to set"
+                + f" is {env_value}. Use the exsiting value. Please unset the {env_name} if you wish ipex launcher set it ",
+                warning_type=WarningType.AmbiguousArgument,
             )
             self.environ_set[env_name] = os.environ[env_name]
         else:
@@ -165,17 +167,20 @@ def set_lib_bin_from_list(
             name_local = supported[0]
             self.verbose(
                 "warning",
-                f"Designated {category} '{name_input}' is unknown. Changing it to '{name_local}'. \
-                    Supported {category} are {supported}.",
+                f"Designated {category} '{name_input}' is unknown. Changing it to '{name_local}'."
+                + f"Supported {category} are {supported}.",
+                warning_type=WarningType.WrongArgument,
             )
         if name_local in skip_list:
             name_local = supported[0]
             self.verbose(
                 "warning",
-                f"Designated {category} '{name_input}' is not applicable at this moment. Changing it to '{name_local}'\
-                    . Please choose another {category} from {supported}.",
+                f"Designated {category} '{name_input}' is not applicable at this moment. Changing it to '{name_local}'."
+                + f"Please choose another {category} from {supported}.",
+                warning_type=WarningType.WrongArgument,
             )
         if name_local == supported[0]:
+            self.verbose("info", "auto choosing bin...")
             for name in supported[2:]:
                 if name in skip_list:
                     continue
@@ -193,7 +198,7 @@ def set_lib_bin_from_list(
                         msg = f"Neither of {supported[2:]} {category} is found"
                     else:
                         msg = f"None of {supported[2:]} {category} is found"
-                    self.verbose("warning", f"{msg} in {self.library_paths}.")
+                    self.verbose("info", f"{msg} in {self.library_paths}.")
                 if extra_warning_msg_with_default_choice != "":
                     extra_warning_msg_with_default_choice = (
                         f" {extra_warning_msg_with_default_choice}"
@@ -211,8 +216,9 @@ def set_lib_bin_from_list(
                     )
                 self.verbose(
                     "warning",
-                    f"Unable to find the '{name_local}' {category} library file in {self.library_paths}.\
-                        {extra_warning_msg_install_guide}",
+                    f"Unable to find the '{name_local}' {category} library file in"
+                    + f"{self.library_paths}.{extra_warning_msg_install_guide}",
+                    warning_type=WarningType.MissingDependency,
                 )
                 name_local = supported[1]
                 if extra_warning_msg_with_default_choice != "":
diff --git a/intel_extension_for_pytorch/cpu/launch/launcher_distributed.py b/intel_extension_for_pytorch/cpu/launch/launcher_distributed.py
index 9b41e3e31..877ea16a7 100644
--- a/intel_extension_for_pytorch/cpu/launch/launcher_distributed.py
+++ b/intel_extension_for_pytorch/cpu/launch/launcher_distributed.py
@@ -4,6 +4,7 @@
 import os
 import psutil
 from .launcher_base import Launcher
+from ...utils._logger import WarningType
 
 
 class DistributedTrainingLauncher(Launcher):
@@ -107,7 +108,10 @@ def get_pin_domain_affinity(
             ):
                 self.verbose(
                     "warning",
-                    "Argument --logical-cores-for-ccl is set but no enough logical cores are available. Disable this argument.",
+                    "Argument --logical-cores-for-ccl is set but no enough logical cores are available. Disable this argument."
+                    + "please see https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/launch_script.html#launch-script-usage-guide"  # noqa: B950
+                    + "for usage guide",
+                    warning_type=WarningType.WrongArgument,
                 )
                 logical_cores_for_ccl = False
                 break
@@ -296,6 +300,7 @@ def launch(self, args):
                         self.verbose(
                             "warning",
                             f'Failed to detect rank id from log file {log_name} at line "{line.strip()}".',
+                            warning_type=WarningType.NotSupported,
                         )
             for fn in log_fns:
                 fn.close()
diff --git a/intel_extension_for_pytorch/cpu/launch/launcher_multi_instances.py b/intel_extension_for_pytorch/cpu/launch/launcher_multi_instances.py
index 820a13377..bfadbdc0f 100644
--- a/intel_extension_for_pytorch/cpu/launch/launcher_multi_instances.py
+++ b/intel_extension_for_pytorch/cpu/launch/launcher_multi_instances.py
@@ -3,6 +3,7 @@
 import os
 import intel_extension_for_pytorch.cpu.auto_ipex as auto_ipex
 from .launcher_base import Launcher
+from ...utils._logger import WarningType
 
 
 class MultiInstancesLauncher(Launcher):
@@ -192,8 +193,9 @@ def launch(self, args):
             ):
                 self.verbose(
                     "warning",
-                    "--latency-mode is exclusive to --ninstances, --ncores-per-instance, --nodes-list and \
-                        --use-logical-cores. They won't take effect even if they are set explicitly.",
+                    "--latency-mode is exclusive to --ninstances, --ncores-per-instance, --nodes-list and"
+                    + "--use-logical-cores. They won't take effect even if they are set explicitly.",
+                    warning_type=WarningType.AmbiguousArgument,
                 )
             args.ncores_per_instance = 4
             args.ninstances = 0
@@ -207,8 +209,9 @@ def launch(self, args):
             ):
                 self.verbose(
                     "warning",
-                    "--throughput-mode is exclusive to --ninstances, --ncores-per-instance, --nodes-list and \
-                        --use-logical-cores. They won't take effect even if they are set explicitly.",
+                    "--throughput-mode is exclusive to --ninstances, --ncores-per-instance, --nodes-list and"
+                    + "--use-logical-cores. They won't take effect even if they are set explicitly.",
+                    warning_type=WarningType.AmbiguousArgument,
                 )
             args.ninstances = len(set([c.node for c in self.cpuinfo.pool_all]))
             args.ncores_per_instance = 0
diff --git a/intel_extension_for_pytorch/cpu/nn/_embeddingbag.py b/intel_extension_for_pytorch/cpu/nn/_embeddingbag.py
index 4df232877..fc0f4c230 100644
--- a/intel_extension_for_pytorch/cpu/nn/_embeddingbag.py
+++ b/intel_extension_for_pytorch/cpu/nn/_embeddingbag.py
@@ -1,7 +1,7 @@
 import torch
-import warnings
 import intel_extension_for_pytorch._C as core
 from typing import Optional, Tuple
+import warnings
 
 Tensor = torch.Tensor
 
@@ -14,16 +14,22 @@ def _embedding_bag_fast_path_sum(
     scale_grad_by_freq: bool = False,
     per_sample_weights: Optional[Tensor] = None,
     padding_idx: Optional[int] = None,
-) -> bool:
+) -> Tuple[bool, str]:
     if indices.dtype != torch.int64 or offsets.dtype != torch.int64:
-        return False
+        return False, "IPEX embeddingbag only support int32 offsets/indices."
     if mode != 0 or scale_grad_by_freq:
-        return False
+        return (
+            False,
+            "IPEX embeddingbag only support mode='sum' and scale_grad_by_freq=False.",
+        )
     if weights.stride(1) != 1 or weights.dtype not in (torch.float, torch.bfloat16):
-        return False
+        return False, "IPEX embeddingbag only support fp32/bf16 weights."
     if per_sample_weights is not None or padding_idx is not None:
-        return False
-    return True
+        return (
+            False,
+            "IPEX embeddingbag only support per_sample_weights/padding_idx = None.",
+        )
+    return True, "supported"
 
 
 torch_embedding_bag = torch.embedding_bag
@@ -91,7 +97,7 @@ def _embeddingbag(
     include_last_offset: bool = False,
     padding_idx: Optional[int] = None,
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
-    if _embedding_bag_fast_path_sum(
+    supported, msg = _embedding_bag_fast_path_sum(
         weights,
         indices,
         offsets,
@@ -99,7 +105,8 @@ def _embeddingbag(
         scale_grad_by_freq,
         per_sample_weights,
         padding_idx,
-    ):
+    )
+    if supported:
         ret = torch.ops.torch_ipex.embedding_bag(
             weights, indices, offsets, sparse, include_last_offset
         )
@@ -107,7 +114,22 @@ def _embeddingbag(
         # here we only return 1 tensor since the other three tensors are not needed in our fast path
         ret = (ret, torch.empty(0), torch.empty(0), torch.empty(0))
     else:
-        warnings.warn("Fallback to torch.embedding bag")
+        r"""
+        Cannot use logging.logger here
+        File "torch/jit/_script.py", line 1395, in script
+            fn = torch._C._jit_script_compile(
+        RuntimeError:
+        attribute lookup is not defined on python value of type 'Logger':
+        File "intel_extension_for_pytorch/cpu/nn/_embeddingbag.py", line 116
+                ret = (ret, torch.empty(0), torch.empty(0), torch.empty(0))
+            else:
+                logger.warning(
+                ~~~~~~~~~~~~~~ <--- HERE
+                    msg + " Fallback to torch.embedding bag.", _type=WarningType.NotSupported
+                )
+        '_embeddingbag' is being compiled since it was called from 'wrapper'
+        """
+        warnings.warn("[NotSupported]" + msg + " Fallback to torch.embedding bag.")
         ret = torch_embedding_bag(
             weights,
             indices,
diff --git a/intel_extension_for_pytorch/cpu/runtime/cpupool.py b/intel_extension_for_pytorch/cpu/runtime/cpupool.py
index 5c9f97027..693f17834 100644
--- a/intel_extension_for_pytorch/cpu/runtime/cpupool.py
+++ b/intel_extension_for_pytorch/cpu/runtime/cpupool.py
@@ -1,7 +1,7 @@
 import functools
-import warnings
 import intel_extension_for_pytorch as ipex
 from .runtime_utils import get_core_list_of_node_id
+from ...utils._logger import logger, WarningType
 
 
 class CPUPool(object):
@@ -23,8 +23,10 @@ def __init__(self, core_ids: list = None, node_id: int = None):
             return
         if core_ids is not None:
             if node_id is not None:
-                warnings.warn(
+                logger.warning(
                     "Both of core_ids and node_id are inputed. core_ids will be used with priority."
+                    + "You can eliminate this warning by only using one of them",
+                    _type=WarningType.AmbiguousArgument,
                 )
             if type(core_ids) is range:
                 core_ids = list(core_ids)
diff --git a/intel_extension_for_pytorch/cpu/runtime/multi_stream.py b/intel_extension_for_pytorch/cpu/runtime/multi_stream.py
index 753737287..74c0ebb8f 100644
--- a/intel_extension_for_pytorch/cpu/runtime/multi_stream.py
+++ b/intel_extension_for_pytorch/cpu/runtime/multi_stream.py
@@ -5,7 +5,7 @@
 from .cpupool import CPUPool
 from .task import Task
 import copy
-import warnings
+from ...utils._logger import logger, WarningType
 
 
 class MultiStreamModuleHint(object):
@@ -103,9 +103,10 @@ def __init__(
             type(cpu_pool) is CPUPool
         ), "Input of cpu_pool must be provided with type of ipex.cpu.runtime.CPUPool"
         if not isinstance(model, torch.jit.ScriptModule):
-            warnings.warn(
+            logger.warning(
                 "Creating MultiStreamModule on an nn.Module. This can be slow due "
-                "to Python Global Interpreter Lock (GIL). Suggest to use JIT ScriptModule for better performance."
+                + "to Python Global Interpreter Lock (GIL). Suggest to use JIT ScriptModule for better performance.",
+                _type=WarningType.WrongArgument,
             )
         self.cpu_pool = cpu_pool
         self.core_list = cpu_pool.core_ids
@@ -127,10 +128,9 @@ def __init__(
 
         if self.num_streams > self.core_list.__len__():
             self.num_streams = self.core_list.__len__()
-            warnings.warn(
-                "The number of streams is larger than number of cores. The number of streams changes to {}.".format(
-                    self.num_streams
-                )
+            logger.warning(
+                f"The number of streams is larger than number of cores. The number of streams changes to {self.num_streams}.",
+                _type=WarningType.WrongArgument,
             )
 
         if self.num_streams == 1:
@@ -502,7 +502,7 @@ def forward(self, *args, **kwargs):
             results_raw_future.append(
                 self.tasks[stream_id](
                     *(self.args_streams_input[stream_id]),
-                    **(self.kwargs_streams_input[stream_id])
+                    **(self.kwargs_streams_input[stream_id]),
                 )
             )
 
@@ -563,10 +563,9 @@ def __init__(
 
         if self.num_streams > self.core_list.__len__():
             self.num_streams = self.core_list.__len__()
-            warnings.warn(
-                "The number of streams is larger than number of cores. The number of streams changes to {}.".format(
-                    self.num_streams
-                )
+            logger.warning(
+                f"The number of streams is larger than number of cores. The number of streams changes to {self.num_streams}.",
+                _type=WarningType.WrongArgument,
             )
 
         if self.num_streams == 1:
diff --git a/intel_extension_for_pytorch/cpu/tpp/__init__.py b/intel_extension_for_pytorch/cpu/tpp/__init__.py
index 6e60cbc27..d0ea959df 100644
--- a/intel_extension_for_pytorch/cpu/tpp/__init__.py
+++ b/intel_extension_for_pytorch/cpu/tpp/__init__.py
@@ -1,5 +1,4 @@
 import pkg_resources
-import warnings
 from . import fused_bert
 from . import utils
 from . import optim
diff --git a/intel_extension_for_pytorch/cpu/tpp/fused_bert.py b/intel_extension_for_pytorch/cpu/tpp/fused_bert.py
index d57c5125a..03a513a08 100644
--- a/intel_extension_for_pytorch/cpu/tpp/fused_bert.py
+++ b/intel_extension_for_pytorch/cpu/tpp/fused_bert.py
@@ -7,10 +7,10 @@
     get_blocking_signature,
 )
 import pkg_resources
-import warnings
 from .optim import AdamW, SGD
 import intel_extension_for_pytorch._C as torch_ipex_cpp
 import copy
+from ...utils._logger import logger, WarningType
 
 try:
     from transformers.modeling_utils import apply_chunking_to_forward
@@ -1047,7 +1047,8 @@ def forward(
                 if use_cache:
                     logger.warning(
                         "`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting "
-                        "`use_cache=False`..."
+                        + "`use_cache=False`...",
+                        _type=WarningType.WrongArgument,
                     )
                     use_cache = False
 
@@ -1295,7 +1296,7 @@ def fast_bert(model, dtype=torch.float, optimizer=None, unpad=False):
             torch.tensor(torch.initial_seed()).to(torch.int32).abs().item()
         )
     except BaseException:
-        warnings.warn(
+        logger.warning(
             "Set seed failed for libxsmm which may impact the training loss, you can call "
             + "torch.manual_seed(N) before invoking fast_bert."
         )
@@ -1334,8 +1335,9 @@ def fast_bert(model, dtype=torch.float, optimizer=None, unpad=False):
         )
         new_model.bert.encoder = BertEncoder(model.bert.config)
     else:
-        warnings.warn(
-            "fast_bert only supports instance of transformers.models.bert.modeling_bert.BertModel"
+        logger.warning(
+            "fast_bert only supports instance of transformers.models.bert.modeling_bert.BertModel",
+            _type=WarningType.NotSupported,
         )
         return model, optimizer
     new_model.load_state_dict(
@@ -1343,9 +1345,10 @@ def fast_bert(model, dtype=torch.float, optimizer=None, unpad=False):
     )  # copy the original params into the tpp module
     block(new_model)  # get block format weights/bias
     if optimizer is None:
-        warnings.warn(
+        logger.warning(
             "Currently ipex.fast_bert API is well optimized for training tasks. It works for inference tasks, "
-            + "though, please use the ipex.optimize API with TorchScript to achieve the peak performance."
+            + "though, please use the ipex.optimize API with TorchScript to achieve the peak performance.",
+            _type=WarningType.NotSupported,
         )
         return new_model
     # replace the original pytorch/transformer optimizer with tpp optimizer for SGD/AdamW
@@ -1354,8 +1357,9 @@ def fast_bert(model, dtype=torch.float, optimizer=None, unpad=False):
     for param_ori, param_tpp in zip(model.parameters(), new_model.parameters()):
         param_pair[param_ori] = param_tpp
     if type(optimizer) not in PT_OPTIMIZER_TO_TPP_OPTIMIZER:
-        warnings.warn(
-            "Still return the origin optimize, the fast_bert can only replace the SGD, AdamW optimizer"
+        logger.warning(
+            "Still return the origin optimize, the fast_bert can only replace the SGD, AdamW optimizer",
+            _type=WarningType.NotSupported,
         )
         new_optimizer = optimizer
     else:
diff --git a/intel_extension_for_pytorch/frontend.py b/intel_extension_for_pytorch/frontend.py
index 0aa835232..29450a35e 100644
--- a/intel_extension_for_pytorch/frontend.py
+++ b/intel_extension_for_pytorch/frontend.py
@@ -1,6 +1,5 @@
 # This Python file uses the following encoding: utf-8
 import copy
-import warnings
 
 import torch
 import torch._dynamo
@@ -36,6 +35,7 @@
 from .fx.concat_linear import _concat_linear
 
 import intel_extension_for_pytorch._C as core
+from .utils._logger import logger, WarningType, warn_if_user_explicitly_set
 
 
 def _copy_model_and_optimizer(model, optimizer):
@@ -405,41 +405,43 @@ def xpu_check_channel_last():
     # when on xpu, some features are not supported
     if device_type == "xpu":
         if opt_properties.auto_kernel_selection:
-            warnings.warn(
-                "For XPU device, the auto kernel selection is unsupported, so disable it."
-            )
             opt_properties.auto_kernel_selection = False
+            msg = "For XPU device, the auto kernel selection is unsupported, so disable it."
+            warn_if_user_explicitly_set(auto_kernel_selection, msg)
         if opt_properties.split_master_weight_for_bf16:
             # currently split master weight for xpu only support sgd
-            if type(optimizer) is torch.optim.SGD:
-                opt_properties.split_master_weight_for_bf16 = True
-            else:
+            if type(optimizer) != torch.optim.SGD:
+                msg = "Currently split master weight for xpu only support sgd"
                 opt_properties.split_master_weight_for_bf16 = False
+                warn_if_user_explicitly_set(split_master_weight_for_bf16, msg)
+
         if opt_properties.graph_mode:
-            warnings.warn(
+            opt_properties.graph_mode = False
+            msg = (
                 "For XPU, the oob solution for inference is to trace model outside of the torch.xpu.optimize,"
                 + " so temp to disable the graph mode"
             )
-            opt_properties.graph_mode = False
+            warn_if_user_explicitly_set(graph_mode, msg)
         if not inplace:
-            warnings.warn(
+            inplace = True
+            msg = (
                 "For XPU device to save valuable device memory, temp to do optimization on inplaced model,"
                 + " so make inplace to be true"
             )
-            inplace = True
+            warn_if_user_explicitly_set(not inplace, msg)
         # for XPU, weight prepack is unsupported, so sample input is useless
         if opt_properties.weights_prepack:
-            warnings.warn(
+            msg = (
                 "For XPU, the weight prepack and sample input are disabled. The onednn layout"
                 + " is automatically chosen to use"
             )
             opt_properties.weights_prepack = False
             sample_input = None
+            warn_if_user_explicitly_set(weights_prepack, msg)
         if opt_properties.optimize_lstm is not None:
-            warnings.warn(
-                "For XPU, the optimize_lstm(replace lstm with ipex_lstm) is unsupported, so disable it"
-            )
+            msg = "For XPU, the optimize_lstm(replace lstm with ipex_lstm) is unsupported, so disable it"
             opt_properties.optimize_lstm = False
+            warn_if_user_explicitly_set(optimize_lstm, msg)
 
     if inplace:
         optimized_model = model
@@ -459,16 +461,14 @@ def xpu_check_channel_last():
             try:
                 optimized_model = optimization.fuse(optimized_model, inplace=True)
             except:  # noqa E722
-                warnings.warn(
-                    "Conv BatchNorm folding failed during the optimize process."
-                )
+                msg = "Conv BatchNorm folding failed during the optimize process."
+                warn_if_user_explicitly_set(conv_bn_folding, msg)
         if opt_properties.linear_bn_folding:
             try:
                 optimized_model = linear_bn_fuse(optimized_model, inplace=True)
             except BaseException:
-                warnings.warn(
-                    "Linear BatchNorm folding failed during the optimize process."
-                )
+                msg = "Linear BatchNorm folding failed during the optimize process."
+                warn_if_user_explicitly_set(linear_bn_folding, msg)
         if opt_properties.replace_dropout_with_identity:
             utils._model_convert.replace_dropout_with_identity(optimized_model)
         if opt_properties.concat_linear:
@@ -492,34 +492,41 @@ def xpu_check_channel_last():
     ):
         if not opt_properties.fuse_update_step:
             opt_properties.split_master_weight_for_bf16 = False
-            warnings.warn(
+            msg = (
                 "IPEX does not non-fused split master weight for bf16 training, "
                 + "have reset split_master_weight_for_bf16 flag to False. "
                 + "If you want to use split_master_weight_for_bf16. "
                 + "Please set both split_master_weight_for_bf16 and fuse_update_step to True."
             )
+            warn_if_user_explicitly_set(split_master_weight_for_bf16, msg)
         elif (
             type(optimizer) not in IPEX_FUSED_OPTIMIZER_LIST_CPU
             and device_type == "cpu"
         ):
             opt_properties.split_master_weight_for_bf16 = False
             opt_properties.fuse_update_step = False
-            warnings.warn(
+            msg = (
                 "IPEX CPU does not support fused/fused split update for "
                 + str(type(optimizer))
                 + " will use non-fused master weight update for bf16 training on CPU."
             )
+            warn_if_user_explicitly_set(
+                fuse_update_step or split_master_weight_for_bf16, msg
+            )
         elif (
             type(optimizer) not in IPEX_FUSED_OPTIMIZER_LIST_XPU
             and device_type == "xpu"
         ):
             opt_properties.split_master_weight_for_bf16 = False
             opt_properties.fuse_update_step = False
-            warnings.warn(
+            msg = (
                 "IPEX XPU does not support fused/fused split update for "
                 + str(type(optimizer))
                 + " will use non-fused master weight update for bf16 training on XPU."
             )
+            warn_if_user_explicitly_set(
+                fuse_update_step or split_master_weight_for_bf16, msg
+            )
 
     if model.training:
         if hasattr(optimized_optimizer, "params_attr"):
@@ -589,8 +596,8 @@ def xpu_check_channel_last():
     if opt_properties.fuse_update_step:
         optimized_optimizer = optimizer_fusion(
             optimized_optimizer,
-            opt_properties.split_master_weight_for_bf16,
             device_type,
+            fuse_update_step,
         )
     return optimized_model, optimized_optimizer
 
@@ -666,10 +673,11 @@ def set_fp32_math_mode(mode=FP32MathMode.FP32, device="cpu"):
         elif mode == FP32MathMode.FP32:
             core.set_fp32_math_mode(core.FP32MathMode.FP32)
         else:
-            warnings.warn(
-                "For CPU device, IPEX does not support mode except \
-                    FP32MathMode.FP32 and FP32MathMode.BF32 for fpmath_mode right now."
+            msg = (
+                "For CPU device, IPEX does not support mode except"
+                + "FP32MathMode.FP32 and FP32MathMode.BF32 for fpmath_mode right now."
             )
+            logger.warning(msg, _type=WarningType.NotSupported)
     elif device == "xpu":
         if mode == FP32MathMode.BF32:
             torch.xpu.set_fp32_math_mode(torch.xpu.FP32MathMode.BF32)
@@ -678,10 +686,11 @@ def set_fp32_math_mode(mode=FP32MathMode.FP32, device="cpu"):
         elif mode == FP32MathMode.TF32:
             torch.xpu.set_fp32_math_mode(torch.xpu.FP32MathMode.TF32)
         else:
-            warnings.warn(
-                "For XPU device, IPEX does not support mode except \
-                    FP32MathMode.FP32, FP32MathMode.BF32 and FP32MathMode.TF32 for fpmath_mode right now."
+            msg = (
+                "For XPU device, IPEX does not support mode except"
+                + "FP32MathMode.FP32, FP32MathMode.BF32 and FP32MathMode.TF32 for fpmath_mode right now."
             )
+            logger.warning(msg, _type=WarningType.NotSupported)
     else:
         raise RuntimeError(
             "Unexpected device type {}. ".format(device) + "Supported are 'cpu', 'xpu'."
diff --git a/intel_extension_for_pytorch/fx/concat_linear.py b/intel_extension_for_pytorch/fx/concat_linear.py
index e0c32c292..39a48f750 100644
--- a/intel_extension_for_pytorch/fx/concat_linear.py
+++ b/intel_extension_for_pytorch/fx/concat_linear.py
@@ -4,7 +4,7 @@
 import torch.fx.experimental.optimization as optimization
 import _operator
 import copy
-import warnings
+from ..utils._logger import logger, WarningType
 
 
 def concat_linear(model: fx.GraphModule, inplace=False) -> fx.GraphModule:
@@ -187,7 +187,10 @@ def apply_concat_linear_on_BasicTransformerBlock(BasicTransformerBlock):
                 apply_concat_linear_on_unet(model)
                 return model
         except BaseException:
-            warnings.warn("failed to apply concat_linear on unet, please report bugs")
+            logger.warning(
+                "failed to apply concat_linear on unet, please report bugs",
+                _type=WarningType.NotSupported,
+            )
 
     if "transformers" in sys.modules:
 
@@ -200,8 +203,9 @@ def is_transfomer_model(model):
                 from transformers.utils.fx import symbolic_trace as hf_symbolic_trace
             except ImportError:
                 # fx are not exposed in transformers.utils
-                warnings.warn(
-                    "failed to import transformers symbolic_trace, cannnot apply concat linear"
+                logger.warning(
+                    "failed to import transformers symbolic_trace, cannnot apply concat linear",
+                    _type=WarningType.NotSupported,
                 )
             try:
                 model: fx.GraphModule = hf_symbolic_trace(
@@ -209,15 +213,17 @@ def is_transfomer_model(model):
                 )
                 return concat_linear(model, inplace)
             except BaseException:
-                warnings.warn(
-                    "failed to symbolic trace model with transformers symbolic_trace, cannnot apply concat linear"
+                logger.warning(
+                    "failed to symbolic trace model with transformers symbolic_trace, cannnot apply concat linear",
+                    _type=WarningType.NotSupported,
                 )
     else:
         try:
             model: fx.GraphModule = fx.symbolic_trace(model)
             return concat_linear(model, inplace)
         except BaseException:
-            warnings.warn(
-                "pytorch native symbolic trace failed, may cannnot apply concat linear"
+            logger.warning(
+                "pytorch native symbolic trace failed, may cannnot apply concat linear",
+                _type=WarningType.NotSupported,
             )
     return model
diff --git a/intel_extension_for_pytorch/launcher.py b/intel_extension_for_pytorch/launcher.py
index 4c5985ea6..432948e2b 100644
--- a/intel_extension_for_pytorch/launcher.py
+++ b/intel_extension_for_pytorch/launcher.py
@@ -1,7 +1,7 @@
 import sys
 import argparse
-import warnings
 from functools import partial
+from .utils._logger import logger, WarningType
 
 from .cpu.launch import (
     init_parser as cpu_init_parser,
@@ -99,10 +99,11 @@ def main():
         and sys.argv[1] != "cpu"
         and sys.argv[1] != "xpu"
     ):
-        warnings.warn(
-            "Backend is not specified, it will automatically default to cpu.",
-            UserWarning,
+        msg = (
+            "Backend is not specified, it will automatically default to cpu."
+            + "Please start with ipexrun <cpu or xpu> python_script args"
         )
+        logger.warning(msg, _type=WarningType.MissingArgument)
         sys.argv.insert(1, "cpu")
     args = parser.parse_args()
     if args.backend == "cpu":
diff --git a/intel_extension_for_pytorch/llm/utils.py b/intel_extension_for_pytorch/llm/utils.py
index b5931a5fe..ff84b0bfb 100644
--- a/intel_extension_for_pytorch/llm/utils.py
+++ b/intel_extension_for_pytorch/llm/utils.py
@@ -9,7 +9,7 @@
 import os
 import shutil
 import typing
-import warnings
+from ..utils._logger import logger, WarningType
 from typing import Dict, Optional, Union
 from transformers.dynamic_module_utils import (
     check_imports,
@@ -23,12 +23,9 @@
     cached_file,
     extract_commit_hash,
     is_offline_mode,
-    logging,
     try_to_load_from_cache,
 )
 
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
-
 
 def _get_relative_imports(module_file):
     with open(module_file, "r", encoding="utf-8") as f:
@@ -181,9 +178,9 @@ def _get_cached_module_file(
     """
     use_auth_token = deprecated_kwargs.pop("use_auth_token", None)
     if use_auth_token is not None:
-        warnings.warn(
+        logger.warning(
             "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
-            FutureWarning,
+            _type=WarningType.DeprecatedArgument,
         )
         if token is not None:
             raise ValueError(
@@ -299,8 +296,8 @@ def _get_cached_module_file(
         url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}"
         logger.warning(
             f"A new version of the following files was downloaded from {url}:\n{new_files}"
-            "\n. Make sure to double-check they do not contain any added malicious code. To avoid downloading new "
-            "versions of the code file, you can pin a revision."
+            + "\n. Make sure to double-check they do not contain any added malicious code. To avoid downloading new "
+            + "versions of the code file, you can pin a revision."
         )
 
     return os.path.join(full_submodule, module_file)
@@ -396,9 +393,9 @@ def _get_class_from_dynamic_module(
     ```"""
     use_auth_token = kwargs.pop("use_auth_token", None)
     if use_auth_token is not None:
-        warnings.warn(
+        logger.warning(
             "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
-            FutureWarning,
+            _type=WarningType.DeprecatedArgument,
         )
         if token is not None:
             raise ValueError(
diff --git a/intel_extension_for_pytorch/nn/functional/_tensor_method.py b/intel_extension_for_pytorch/nn/functional/_tensor_method.py
index 27068f809..3f5a8cccd 100644
--- a/intel_extension_for_pytorch/nn/functional/_tensor_method.py
+++ b/intel_extension_for_pytorch/nn/functional/_tensor_method.py
@@ -1,11 +1,11 @@
-import warnings
 import torch
 from torch.overrides import has_torch_function_unary, handle_torch_function
+from ...utils._logger import logger
 
 
 def _numpy(x):
     if x.dtype == torch.bfloat16:
-        warnings.warn(
+        logger.warning(
             "calling in ipex numpy which is not share memory with torch tensor for bfloat16 input."
         )
         return torch._C._TensorBase.numpy(x.float())
diff --git a/intel_extension_for_pytorch/nn/modules/weight_only_quantization.py b/intel_extension_for_pytorch/nn/modules/weight_only_quantization.py
index b63cd860b..a5fc248a0 100644
--- a/intel_extension_for_pytorch/nn/modules/weight_only_quantization.py
+++ b/intel_extension_for_pytorch/nn/modules/weight_only_quantization.py
@@ -11,6 +11,7 @@
     quantize_per_block,
     WoqWeightDtype,
 )
+from ...utils._logger import logger, WarningType
 
 
 class WeightOnlyQuantizedLinear(nn.Module):
@@ -97,9 +98,10 @@ def from_float(cls, mod, scales=None, zero_points=None):
             # Fall back to lowp_mode=2 in such case
             # TODO(Weiwen) Support lowp_mode=3
             lowp_mode = 2
-            print(
+            logger.warning(
                 "Warning: lowp_mode=3(INT8) is not supported yet for INT8 weight. "
-                "Falling back to 2(BF16)."
+                + "Falling back to 2(BF16).",
+                _type=WarningType.NotSupported,
             )
         act_quant_mode = qconfig.act_quant_mode
         dtype = qconfig.weight_dtype
diff --git a/intel_extension_for_pytorch/nn/utils/_parameter_wrapper.py b/intel_extension_for_pytorch/nn/utils/_parameter_wrapper.py
index d49dffd60..25d10ba77 100644
--- a/intel_extension_for_pytorch/nn/utils/_parameter_wrapper.py
+++ b/intel_extension_for_pytorch/nn/utils/_parameter_wrapper.py
@@ -3,7 +3,7 @@
 import functools
 import contextlib
 import types
-import warnings
+from ...utils._logger import logger, WarningType
 from intel_extension_for_pytorch.cpu._auto_kernel_selection import (
     _using_dnnl,
     _using_tpp,
@@ -302,8 +302,9 @@ def can_cast_inference(self, dtype):
             return True
         ori_dtype = self.parameter.dtype
         if ori_dtype not in (torch.float, torch.float32, torch.bfloat16, torch.float16):
-            warnings.warn(
-                f"WARNING: Can't convert model's parameters dtype from {ori_dtype} to {dtype}"
+            logger.warning(
+                f"Can't convert model's parameters dtype from {ori_dtype} to {dtype}",
+                _type=WarningType.NotSupported,
             )
             return False
         module_cls = IPEX_WEIGHT_CONVERT_MODULE_CPU(True, dtype)
@@ -329,8 +330,9 @@ def can_cast_training(self, dtype):
             torch.float,
             torch.float32,
         ):
-            warnings.warn(
-                f"WARNING: Can't convert model's parameters dtype from {ori_dtype} to {dtype}"
+            logger.warning(
+                f"Can't convert model's parameters dtype from {ori_dtype} to {dtype}",
+                _type=WarningType.NotSupported,
             )
             return False
         module_cls = IPEX_WEIGHT_CONVERT_MODULE_CPU(False, dtype)
diff --git a/intel_extension_for_pytorch/optim/_optimizer_utils.py b/intel_extension_for_pytorch/optim/_optimizer_utils.py
index fcf3c6260..c9ed97949 100644
--- a/intel_extension_for_pytorch/optim/_optimizer_utils.py
+++ b/intel_extension_for_pytorch/optim/_optimizer_utils.py
@@ -1,7 +1,7 @@
 import torch
 import copy
 import types
-import warnings
+from ..utils._logger import warn_if_user_explicitly_set
 from copy import deepcopy
 from itertools import chain
 from collections import defaultdict
@@ -358,7 +358,7 @@ def get_optimizer_unpacked_state_dict(self):
         )
 
 
-def optimizer_fusion(optimizer, master_weight_split, device_type):
+def optimizer_fusion(optimizer, device_type, user_explict_fuse):
     r"""
     Patch "step" method to choose IPEX optimized fused update kernel.
     """
@@ -371,20 +371,22 @@ def optimizer_fusion(optimizer, master_weight_split, device_type):
         elif device_type == "xpu":
             step = OPTIMIZER_FUSED_STEP_MAPPING_XPU[type(optimizer)]
         else:
-            warnings.warn(
+            msg = (
                 "IPEX does not support device type "
                 + str(device_type)
                 + ". For now, only support CPU, XPU."
             )
+            warn_if_user_explicitly_set(user_explict_fuse, msg)
             return optimizer
         if not hasattr(optimizer, "_original_step"):
             setattr(optimizer, "_original_step", optimizer.step)  # noqa: B010
         optimizer.step = types.MethodType(step, optimizer)
         setattr(optimizer, "fused", True)  # noqa: B010
     except KeyError:
-        warnings.warn(
+        msg = (
             "Does not suport fused step for "
             + str(type(optimizer))
             + ", will use non-fused step"
         )
+        warn_if_user_explicitly_set(user_explict_fuse, msg)
     return optimizer
diff --git a/intel_extension_for_pytorch/quantization/_GPTQ/_quantize.py b/intel_extension_for_pytorch/quantization/_GPTQ/_quantize.py
index 65c016dd0..19d7bfe2a 100644
--- a/intel_extension_for_pytorch/quantization/_GPTQ/_quantize.py
+++ b/intel_extension_for_pytorch/quantization/_GPTQ/_quantize.py
@@ -1,10 +1,7 @@
 import logging
 import torch
 from pathlib import Path
-
-format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-logging.basicConfig(level=logging.INFO, format=format_str)
-logger = logging.getLogger(__name__)
+from ...utils._logger import logger, WarningType
 
 
 @torch.no_grad()
@@ -67,8 +64,10 @@ def gptq(
             }
     if use_max_length and pad_max_length is None:
         logger.warning(
-            "You choose to use unified sequence length for calibration, \
-        but you have not set length value. Default sequence length is 2048 and this might cause inference error!"
+            "You choose to use unified sequence length for calibration"
+            + "but you have not set length value. Default sequence length"
+            + "is 2048 and this might cause inference error!",
+            _type=WarningType.WrongArgument,
         )
     model, gptq_config = gptq_quantize(
         model,
diff --git a/intel_extension_for_pytorch/quantization/_GPTQ/gptq/gptq.py b/intel_extension_for_pytorch/quantization/_GPTQ/gptq/gptq.py
index 9ff1ddb64..f6a8a3a75 100644
--- a/intel_extension_for_pytorch/quantization/_GPTQ/gptq/gptq.py
+++ b/intel_extension_for_pytorch/quantization/_GPTQ/gptq/gptq.py
@@ -1,4 +1,4 @@
-import logging
+from ....utils._logger import logger, WarningType
 import math
 import random
 import re
@@ -15,10 +15,6 @@
     quantize,
 )
 
-format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-logging.basicConfig(level=logging.INFO, format=format_str)
-logger = logging.getLogger(__name__)
-
 DEBUG = False
 
 
@@ -100,19 +96,15 @@ def prepare_dataloader(self):
         else:
             # general selection, no padding, not GPTQ original implementation.
             self.obtain_first_n_samples()
-        try:
-            self.cache_key_arguments = {
-                "i": 0
-            }  # a dict of list, keyword arguments ("attention_masks", "position_ids", etc.)
-            # Note that the first elements in cache_positional_arguments is main input: hidden_states
-            if self.cache_positional_arguments is None:
-                self.cache_positional_arguments = (
-                    []
-                )  # a list of list, positional arguments ("rotary_pos_emb" in chatglm)
-            self.is_ready = True
-        except Exception:
-            logger.warning("GPTQ Quantizer initialization failed!")
-            pass
+        self.cache_key_arguments = {
+            "i": 0
+        }  # a dict of list, keyword arguments ("attention_masks", "position_ids", etc.)
+        # Note that the first elements in cache_positional_arguments is main input: hidden_states
+        if self.cache_positional_arguments is None:
+            self.cache_positional_arguments = (
+                []
+            )  # a list of list, positional arguments ("rotary_pos_emb" in chatglm)
+        self.is_ready = True
 
     def obtain_first_n_samples(self, seed=0):
         """Get first nsample data as the real calibration dataset."""
@@ -142,7 +134,8 @@ def obtain_first_n_samples(self, seed=0):
                     length = batch["input_ids"].shape[-1]
                 except Exception:
                     logger.warning(
-                        "Please make sure your dict'like data contains key of 'input_ids'."
+                        "Please make sure your dict'like data contains key of 'input_ids'.",
+                        _type=WarningType.WrongArgument,
                     )
                     continue
                 batch_final = {}
@@ -170,7 +163,8 @@ def obtain_first_n_samples(self, seed=0):
             self.dataloader.append(batch_final)
         if len(self.dataloader) < self.nsamples:
             logger.warning(
-                f"Try to use {self.nsamples} data, but entire dataset size is {len(self.dataloader)}."
+                f"Try to use {self.nsamples} data, but entire dataset size is {len(self.dataloader)}.",
+                _type=WarningType.WrongArgument,
             )
 
     def obtain_first_n_samples_fulllength(self, seed=0):
@@ -203,7 +197,8 @@ def obtain_first_n_samples_fulllength(self, seed=0):
                     length = batch["input_ids"].shape[-1]
                 except Exception:
                     logger.warning(
-                        "Please make sure your dict'like data contains key of 'input_ids'."
+                        "Please make sure your dict'like data contains key of 'input_ids'.",
+                        _type=WarningType.WrongArgument,
                     )
                     continue
                 batch_final = {}
@@ -237,8 +232,9 @@ def obtain_first_n_samples_fulllength(self, seed=0):
             self.dataloader.append(batch_final)
         if len(self.dataloader) < self.nsamples:  # pragma: no cover
             logger.warning(
-                f"Trying to allocate {self.nsamples} data with fixed length {unified_length}, \
-            but only {len(self.dataloader)} samples are found. Please use smaller 'self.pad_max_length' value."
+                f"Trying to allocate {self.nsamples} data with fixed length {unified_length},"
+                + f"but only {len(self.dataloader)} samples are found. Please use smaller 'self.pad_max_length' value.",
+                _type=WarningType.WrongArgument,
             )
 
     def get_full_layer_name(self, sub_layer_name, block_idx):
diff --git a/intel_extension_for_pytorch/quantization/_autotune.py b/intel_extension_for_pytorch/quantization/_autotune.py
index 2f9d4cdf1..0dce28d9f 100644
--- a/intel_extension_for_pytorch/quantization/_autotune.py
+++ b/intel_extension_for_pytorch/quantization/_autotune.py
@@ -4,7 +4,7 @@
 import sys
 import copy
 import json
-import logging
+from ..utils._logger import logger
 import subprocess
 import torch
 import time
@@ -103,8 +103,8 @@ def autotune(
             try:
                 example_inputs = get_example_inputs(model, calib_dataloader)
             except Exception:
-                logging.info(
-                    "UserWarning: Wrong dataloader format. Please refer to autotune doc. Aborting..."
+                logger.critical(
+                    "Wrong dataloader format. Please refer to autotune doc. Aborting..."
                 )
                 exit()
 
diff --git a/intel_extension_for_pytorch/quantization/_quantize.py b/intel_extension_for_pytorch/quantization/_quantize.py
index 2f756d7e9..a37c055e7 100644
--- a/intel_extension_for_pytorch/quantization/_quantize.py
+++ b/intel_extension_for_pytorch/quantization/_quantize.py
@@ -1,6 +1,6 @@
 import copy
 import functools
-import warnings
+from ..utils._logger import logger, WarningType
 
 import torch
 from torch.ao.quantization import PlaceholderObserver, QConfig, QConfigMapping
@@ -59,11 +59,12 @@ def prepare(
         configure, QConfig
     ), f"IPEX quantization: prepare configure should be an instance of QConfigMapping or QConfig, but got {type(configure)}"
     if isinstance(configure, QConfig):
-        warnings.warn(
+        logger.warning(
             "\nIPEX quantization: QConfig are deprecated. Please use QConfigMapping instead.\nUsage:"
-            "\n    qconfig_mapping = ipex.quantization.default_static_qconfig_mapping # for static quantization"
-            "\n    qconfig_mapping = ipex.quantization.default_dynamic_qconfig_mapping # for dynamic quantization"
-            "\n    prepared_model = ipex.quantization.prepare(model_fp32, qconfig_mapping, ...)"
+            + "\n    qconfig_mapping = ipex.quantization.default_static_qconfig_mapping # for static quantization"
+            + "\n    qconfig_mapping = ipex.quantization.default_dynamic_qconfig_mapping # for dynamic quantization"
+            + "\n    prepared_model = ipex.quantization.prepare(model_fp32, qconfig_mapping, ...)",
+            _type=WarningType.AmbiguousArgument,
         )
     if isinstance(configure, QConfigMapping):
         configure = configure.global_qconfig
@@ -94,7 +95,10 @@ def prepare(
             prepare_model = optimization.fuse(prepare_model, inplace=inplace)
             prepare_model = linear_bn_fuse(prepare_model, inplace=inplace)
         except BaseException:
-            warnings.warn("BatchNorm folding failed during the prepare process.")
+            logger.warning(
+                "BatchNorm folding failed during the prepare process.",
+                _type=WarningType.NotSupported,
+            )
 
     # replace dropout with identity to enable more fusion pattern.
     nn.utils._model_convert.replace_dropout_with_identity(prepare_model)
diff --git a/intel_extension_for_pytorch/transformers/generation/beam_search.py b/intel_extension_for_pytorch/transformers/generation/beam_search.py
index 6ae79b036..0c676eabf 100644
--- a/intel_extension_for_pytorch/transformers/generation/beam_search.py
+++ b/intel_extension_for_pytorch/transformers/generation/beam_search.py
@@ -1,7 +1,7 @@
 import torch
 from torch import nn
 import torch.distributed as dist
-import warnings
+from ...utils._logger import logger, WarningType
 from typing import Optional, Tuple, Union, List
 from transformers.generation.stopping_criteria import (
     StoppingCriteriaList,
@@ -65,16 +65,16 @@ def _beam_search(
         stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
     )
     if max_length is not None:
-        warnings.warn(
+        logger.warning(
             "`max_length` is deprecated in this function, use"
             " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.",
-            UserWarning,
+            _type=WarningType.DeprecatedArgument,
         )
         stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length)
     if len(stopping_criteria) == 0:
-        warnings.warn(
-            "You don't have defined any stopping_criteria, this will likely loop forever",
-            UserWarning,
+        logger.warning(
+            "You have not defined any stopping_criteria, this will likely loop forever",
+            _type=WarningType.WrongArgument,
         )
     pad_token_id = (
         pad_token_id
diff --git a/intel_extension_for_pytorch/transformers/generation/greedy_search.py b/intel_extension_for_pytorch/transformers/generation/greedy_search.py
index ca19a5404..4e6c2f6fe 100644
--- a/intel_extension_for_pytorch/transformers/generation/greedy_search.py
+++ b/intel_extension_for_pytorch/transformers/generation/greedy_search.py
@@ -1,6 +1,6 @@
 import torch
 import torch.distributed as dist
-import warnings
+from ...utils._logger import logger, WarningType
 from typing import Optional, Tuple, Union, List
 from transformers.generation.stopping_criteria import (
     StoppingCriteriaList,
@@ -62,10 +62,10 @@ def _greedy_search(
         stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
     )
     if max_length is not None:
-        warnings.warn(
+        logger.warning(
             "`max_length` is deprecated in this function, use"
             " `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.",
-            UserWarning,
+            _type=WarningType.DeprecatedArgument,
         )
         stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length)
     pad_token_id = (
diff --git a/intel_extension_for_pytorch/transformers/models/cpu/fusions/linear_fusion.py b/intel_extension_for_pytorch/transformers/models/cpu/fusions/linear_fusion.py
index 967eb5ef1..06e01bd66 100644
--- a/intel_extension_for_pytorch/transformers/models/cpu/fusions/linear_fusion.py
+++ b/intel_extension_for_pytorch/transformers/models/cpu/fusions/linear_fusion.py
@@ -1,7 +1,7 @@
 import torch
 from torch import nn
 import math
-import warnings
+from .....utils._logger import logger, WarningType
 from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear
 from intel_extension_for_pytorch.quantization import (
     get_weight_only_quant_qconfig_mapping,
@@ -270,10 +270,11 @@ def __init__(self, module, tpp=False, woq=False):
             for i in range(self.num_concat):
                 linear = self.linear_list[i]
                 if not hasattr(linear, "_op_context"):
-                    warnings.warn(
+                    logger.warning(
                         "Concat linear fusion for CPU WOQ failed "
-                        "because linear is not converted to WOQ Linear. "
-                        "Falling back to separate linears."
+                        + "because linear is not converted to WOQ Linear. "
+                        + "Falling back to separate linears.",
+                        _type=WarningType.NotSupported,
                     )
                     weights_list = []
                     break
diff --git a/intel_extension_for_pytorch/transformers/models/reference/models.py b/intel_extension_for_pytorch/transformers/models/reference/models.py
index ec6fa806a..cc320246b 100644
--- a/intel_extension_for_pytorch/transformers/models/reference/models.py
+++ b/intel_extension_for_pytorch/transformers/models/reference/models.py
@@ -10,8 +10,7 @@
     BaseModelOutput,
 )
 
-from transformers.utils import logging
-import warnings
+from ....utils._logger import logger, WarningType
 import transformers
 
 try:
@@ -29,7 +28,14 @@
     )
 except ImportError:
     pass
-logger = logging.get_logger(__name__)
+
+# https://github.com/huggingface/transformers/blob/b647acdb53d251cec126b79e505bac11821d7c93/src/transformers/models/t5/modeling_t5.py#L1336  # noqa: B950
+__HEAD_MASK_WARNING_MSG = """
+The input argument `head_mask` was split into two arguments `head_mask` and `decoder_head_mask`. Currently,
+`decoder_head_mask` is set to copy `head_mask`, but this feature is deprecated and will be removed in future versions.
+If you do not want to use any `decoder_head_mask` now, please set `decoder_head_mask = torch.ones(num_layers,
+num_heads)`.
+"""
 
 
 def GPTJForCausalLM_forward(
@@ -176,7 +182,8 @@ def LlamaModel_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -440,10 +447,10 @@ def BloomForCausalLM_forward(
         are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]`
     """
     if deprecated_arguments.pop("position_ids", False) is not False:
-        warnings.warn(
+        logger.warning(
             "`position_ids` have no functionality in BLOOM and will be removed in v5.0.0. You can safely ignore"
             " passing `position_ids`.",
-            FutureWarning,
+            _type=WarningType.DeprecatedArgument,
         )
     if len(deprecated_arguments) > 0:
         raise ValueError(f"Got unexpected arguments: {deprecated_arguments}")
@@ -725,7 +732,8 @@ def BaichuanModel_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -951,7 +959,8 @@ def GLMTransformer_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -1344,7 +1353,9 @@ def T5ForConditionalGeneration_forward(
     # FutureWarning: head_mask was separated into two input args - head_mask, decoder_head_mask
     if head_mask is not None and decoder_head_mask is None:
         if self.config.num_layers == self.config.num_decoder_layers:
-            warnings.warn(__HEAD_MASK_WARNING_MSG, FutureWarning)
+            logger.warning(
+                __HEAD_MASK_WARNING_MSG, _type=WarningType.DeprecatedArgument
+            )
             decoder_head_mask = head_mask
 
     # Encode if needed (training, first prediction pass)
@@ -1566,7 +1577,8 @@ def MistralModel_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -1888,7 +1900,8 @@ def MixtralModel_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -2073,7 +2086,8 @@ def StableLMEpochModel_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -2303,7 +2317,8 @@ def QWenModel_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
@@ -2444,7 +2459,8 @@ def GitEncoder_forward(
     if self.gradient_checkpointing and self.training:
         if use_cache:
             logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...",
+                _type=WarningType.WrongArgument,
             )
             use_cache = False
 
diff --git a/intel_extension_for_pytorch/transformers/models/reference/modules/decoder.py b/intel_extension_for_pytorch/transformers/models/reference/modules/decoder.py
index ff93f3f6e..5352f7779 100644
--- a/intel_extension_for_pytorch/transformers/models/reference/modules/decoder.py
+++ b/intel_extension_for_pytorch/transformers/models/reference/modules/decoder.py
@@ -11,7 +11,7 @@
     _IPEXlinearSiluMulRef,
 )
 from torch.nn import functional as F
-import warnings
+from .....utils._logger import logger, WarningType
 
 
 def LlamaDecoderLayer_forward(
@@ -704,8 +704,9 @@ def MixtralDecoderLayer_forward(
     **kwargs,
 ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
     if "padding_mask" in kwargs:
-        warnings.warn(
-            "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+        logger.warning(
+            "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`",
+            _type=WarningType.DeprecatedArgument,
         )
     """
     Args:
diff --git a/intel_extension_for_pytorch/transformers/optimize.py b/intel_extension_for_pytorch/transformers/optimize.py
index 503decd3d..bf4e48096 100644
--- a/intel_extension_for_pytorch/transformers/optimize.py
+++ b/intel_extension_for_pytorch/transformers/optimize.py
@@ -1,6 +1,6 @@
 import torch
 import copy
-import warnings
+from ..utils._logger import logger, WarningType
 import pkg_resources
 from intel_extension_for_pytorch.cpu._auto_kernel_selection import (
     _enable_tpp,
@@ -1017,8 +1017,9 @@ def optimize(
     if isinstance(model, torch.jit.ScriptModule):
         return model
     if model.training or optimizer is not None:
-        warnings.warn(
-            "fail to apply ipex.llm.optimize, this API supports inference for now, fallback to default path"
+        logger.warning(
+            "fail to apply ipex.llm.optimize, this API supports inference for now, fallback to default path",
+            _type=WarningType.NotSupported,
         )
         return model, optimizer
 
@@ -1046,12 +1047,14 @@ def optimize(
                 )
             )
         if version.parse(trans_version) > version.parse(validated_version):
-            warnings.warn(
-                f"The transformers version is {trans_version}, bigger than validated {validated_version}, may have risks"
+            logger.warning(
+                f"The transformers version is {trans_version}, bigger than validated {validated_version}, may have risks",
+                _type=WarningType.MissingDependency,
             )
         if not hasattr(model, "config"):
-            warnings.warn(
-                "Can not check transformers model config to detect its model family, fallback to origin model"
+            logger.warning(
+                "Can not check transformers model config to detect its model family, fallback to origin model",
+                _type=WarningType.NotSupported,
             )
             return model
 
@@ -1076,9 +1079,10 @@ def optimize(
             "GitForCausalLM",
         ]
         if not well_supported_model:
-            warnings.warn(
+            logger.warning(
                 "ipex.llm.optimize supports Llama, GPT-J, GPT-Neox, Falcon, OPT, Bloom, CodeGen, Baichuan, ChatGLM, "
-                + "GPTBigCode, T5, Mistral, Mixtral, MPT, StableLM, QWen, and Git, fallback to origin model"
+                + "GPTBigCode, T5, Mistral, Mixtral, MPT, StableLM, QWen, and Git, fallback to origin model",
+                _type=WarningType.NotSupported,
             )
             return model
 
@@ -1202,8 +1206,9 @@ def optimize(
         return _model
 
     except RuntimeError as e:
-        warnings.warn(
-            f"fail to apply ipex.llm.optimize due to: {e}, fallback to the origin model"
+        logger.warning(
+            f"fail to apply ipex.llm.optimize due to: {e}, fallback to the origin model",
+            _type=WarningType.NotSupported,
         )
         return model
 
@@ -1222,8 +1227,9 @@ def optimize_transformers(
     sample_inputs=None,
     deployment_mode=True,
 ):
-    warnings.warn(
-        "ipex.optimize_transformers API is going to be deprecated, please use ipex.llm.optimize instead."
+    logger.warning(
+        "ipex.optimize_transformers API is going to be deprecated, please use ipex.llm.optimize instead.",
+        _type=WarningType.DeprecatedArgument,
     )
     return optimize(
         model=model,
diff --git a/intel_extension_for_pytorch/utils/_logger.py b/intel_extension_for_pytorch/utils/_logger.py
new file mode 100644
index 000000000..f31852ea2
--- /dev/null
+++ b/intel_extension_for_pytorch/utils/_logger.py
@@ -0,0 +1,94 @@
+import logging
+import warnings
+import functools
+
+format_str = "%(asctime)s - %(filename)s - %(name)s - %(levelname)s - %(message)s"
+logging.basicConfig(format=format_str)
+
+from enum import Enum
+
+
+class WarningType(Enum):
+    NotSupported = 1
+    MissingDependency = 2
+    MissingArgument = 3
+    WrongArgument = 4
+    DeprecatedArgument = 5
+    AmbiguousArgument = 6
+
+
+UserFixWarning = {
+    WarningType.MissingDependency,
+    WarningType.MissingArgument,
+    WarningType.WrongArgument,
+    WarningType.AmbiguousArgument,
+}
+
+WarningType2Prefix = {
+    WarningType.NotSupported: "[NotSupported]",
+    WarningType.MissingDependency: "[MissingDependency]",
+    WarningType.MissingArgument: "[MissingArgument]",
+    WarningType.WrongArgument: "[WrongArgument]",
+    WarningType.DeprecatedArgument: "[DeprecatedArgument]",
+    WarningType.AmbiguousArgument: "[AmbiguousArgument]",
+}
+
+
+class _Logger(logging.Logger):
+    """
+    An IPEX wrapper for logging.logger
+    We use this wrapper for two purpose:
+    (1) Unified the usage for warnings.warn and logging.warning: Accroding to
+    https://docs.python.org/3/howto/logging.html, we use warnings.warn if the
+    issue is avoidable and logging.warn if there is nothing the client
+    application can do about the situation.
+    (2) Adding more detailed prefixing to the types of the warnings:
+     See https://github.com/intel-innersource/frameworks.ai.pytorch.ipex-cpu/issues/2618.
+     - [NotSupported]
+     - [MissingDependency]
+     - [MissingArgument]
+     - [WrongArgument]
+     - [DeprecatedArgument]
+     - [AmbiguousArgument]
+    """
+
+    def __init__(self, name="IPEX"):
+        super(_Logger, self).__init__(name=name)
+
+    def warning(self, msg, *args, **kwargs):
+        """
+        Log 'msg % args' with severity 'WARNING'.
+
+        To pass exception information, use the keyword argument exc_info with
+        a true value, e.g.
+
+        logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1)
+        """
+        warning_t = kwargs.pop("_type", None)
+        if warning_t:
+            msg = WarningType2Prefix[warning_t] + msg
+        if warning_t in UserFixWarning:
+            warnings.warn(msg)
+        super(_Logger, self).warning(msg, *args, **kwargs)
+
+
+logging.setLoggerClass(_Logger)
+logger = logging.getLogger("IPEX")
+
+
+def warn_if_user_explicitly_set(user_have_set, msg):
+    if user_have_set:
+        logger.warning(msg, _type=WarningType.NotSupported)
+    else:
+        logger.info(msg)
+
+
+@functools.lru_cache(None)
+def warning_once(self, *args, **kwargs):
+    """
+    Emit the warning with the same message only once
+    """
+    self.warning(*args, **kwargs)
+
+
+logging.Logger.warning_once = warning_once
diff --git a/intel_extension_for_pytorch/xpu/cpp_extension.py b/intel_extension_for_pytorch/xpu/cpp_extension.py
index 16cceb111..7efcc79eb 100644
--- a/intel_extension_for_pytorch/xpu/cpp_extension.py
+++ b/intel_extension_for_pytorch/xpu/cpp_extension.py
@@ -10,7 +10,7 @@
 import sysconfig
 import errno
 
-import warnings
+from ..utils._logger import logger, WarningType
 
 import torch
 from torch.utils.cpp_extension import _TORCH_PATH
@@ -202,7 +202,10 @@ def __init__(self, *args, **kwargs) -> None:
                 "{}. Falling back to using the slow distutils backend."
             )
             if not is_ninja_available():
-                warnings.warn(msg.format("we could not find ninja."))
+                logger.warning(
+                    msg.format("we could not find ninja."),
+                    _type=WarningType.MissingDependency,
+                )
                 self.use_ninja = False
 
     def finalize_options(self) -> None:
@@ -570,12 +573,13 @@ def check_compiler_abi_compatibility(compiler) -> bool:
 
     # First check if the compiler is one of the expected ones for the particular platform.
     if not check_compiler_ok_for_platform(compiler):
-        warnings.warn(
+        logger.warning(
             WRONG_COMPILER_WARNING.format(
                 user_compiler=compiler,
                 pytorch_compiler=_accepted_compilers_for_platform()[0],
                 platform=sys.platform,
-            )
+            ),
+            _type=WarningType.MissingDependency,
         )
         return False
 
@@ -599,14 +603,20 @@ def check_compiler_abi_compatibility(compiler) -> bool:
             version = ["0", "0", "0"] if match is None else list(match.groups())
     except Exception:
         _, error, _ = sys.exc_info()
-        warnings.warn(f"Error checking compiler version for {compiler}: {error}")
+        logger.warning(
+            f"Error checking compiler version for {compiler}: {error}",
+            _type=WarningType.MissingDependency,
+        )
         return False
 
     if tuple(map(int, version)) >= minimum_required_version:
         return True
 
     compiler = f'{compiler} {".".join(version)}'
-    warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
+    logger.warning(
+        ABI_INCOMPATIBILITY_WARNING.format(compiler),
+        _type=WarningType.MissingDependency,
+    )
 
     return False
 
@@ -635,12 +645,13 @@ def get_compiler_abi_compatibility_and_version(compiler) -> Tuple[bool, TorchVer
 
     # First check if the compiler is one of the expected ones for the particular platform.
     if not check_compiler_ok_for_platform(compiler):
-        warnings.warn(
+        logger.warning(
             WRONG_COMPILER_WARNING.format(
                 user_compiler=compiler,
                 pytorch_compiler=_accepted_compilers_for_platform()[0],
                 platform=sys.platform,
-            )
+            ),
+            _type=WarningType.MissingDependency,
         )
         return (False, TorchVersion("0.0.0"))
 
@@ -664,14 +675,20 @@ def get_compiler_abi_compatibility_and_version(compiler) -> Tuple[bool, TorchVer
             version = ["0", "0", "0"] if match is None else list(match.groups())
     except Exception:
         _, error, _ = sys.exc_info()
-        warnings.warn(f"Error checking compiler version for {compiler}: {error}")
+        logger.warning(
+            f"Error checking compiler version for {compiler}: {error}",
+            _type=WarningType.MissingDependency,
+        )
         return (False, TorchVersion("0.0.0"))
 
     if tuple(map(int, version)) >= minimum_required_version:
         return (True, TorchVersion(".".join(version)))
 
     compiler = f'{compiler} {".".join(version)}'
-    warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
+    logger.warning(
+        ABI_INCOMPATIBILITY_WARNING.format(compiler),
+        _type=WarningType.MissingDependency,
+    )
 
     return (False, TorchVersion(".".join(version)))
 
@@ -1359,7 +1376,7 @@ def check_onednn_cfg(self):
         if self.__onednn_root is None:
             raise "Didn't detect dnnl root. Please source <oneapi_dir>/dnnl/<version>/env/vars.sh "
         else:
-            warnings.warn(
+            logger.warning(
                 "This extension has static linked onednn library. Please attaction to \
                 that, this path of onednn version maybe not match with the built-in version."
             )
diff --git a/intel_extension_for_pytorch/xpu/random.py b/intel_extension_for_pytorch/xpu/random.py
index a2629732d..f64ac3526 100644
--- a/intel_extension_for_pytorch/xpu/random.py
+++ b/intel_extension_for_pytorch/xpu/random.py
@@ -5,7 +5,7 @@
 
 import contextlib
 from typing import Generator
-import warnings
+from ..utils._logger import logger, WarningType
 
 __all__ = [
     "get_rng_state",
@@ -215,7 +215,7 @@ def fork_rng(
     if devices is None:
         num_devices = torch.xpu.device_count()
         if num_devices > 1 and not _fork_rng_warned_already:
-            warnings.warn(
+            logger.warning(
                 (
                     "XPU reports that you have {num_devices} available devices, and you "
                     "have used {caller} without explicitly specifying which devices are being used. "
@@ -230,7 +230,8 @@ def fork_rng(
                     "to `range(torch.xpu.device_count())`."
                 ).format(
                     num_devices=num_devices, caller=_caller, devices_kw=_devices_kw
-                )
+                ),
+                _type=WarningType.AmbiguousArgument,
             )
             _fork_rng_warned_already = True
         devices = list(range(num_devices))
diff --git a/intel_extension_for_pytorch/xpu/utils.py b/intel_extension_for_pytorch/xpu/utils.py
index 00b456fc6..0477e79f7 100644
--- a/intel_extension_for_pytorch/xpu/utils.py
+++ b/intel_extension_for_pytorch/xpu/utils.py
@@ -2,7 +2,7 @@
 import torch
 from .. import _C
 from enum import Enum
-import warnings
+from ..utils._logger import logger, WarningType
 from .. import frontend
 import intel_extension_for_pytorch  # noqa
 
@@ -30,7 +30,10 @@ def from_usm(src, dtype, shape, stride=None, device_id: int = -1) -> torch.Tenso
     Warning: This is decrepated. Please use torch.from_dlpack instead.
     """
 
-    warnings.warn("from_usm is decrepated. Please use torch.from_dlpack instead.")
+    logger.warning(
+        "from_usm is decrepated. Please use torch.from_dlpack instead.",
+        _type=WarningType.DeprecatedArgument,
+    )
     return _C._from_usm(src, dtype, shape, stride, device_id)
 
 
@@ -46,7 +49,10 @@ def to_usm(src: torch.Tensor):
     Warning: This is decrepated. Please use torch.to_dlpack instead.
     """
 
-    warnings.warn("to_usm is decrepated. Please use torch.to_dlpack instead.")
+    logger.warning(
+        "to_usm is decrepated. Please use torch.to_dlpack instead.",
+        _type=WarningType.DeprecatedArgument,
+    )
     return _C._to_usm(src)
 
 
diff --git a/tests/cpu/test_ipex_optimize.py b/tests/cpu/test_ipex_optimize.py
index ab0e275dc..95e3cbf8f 100644
--- a/tests/cpu/test_ipex_optimize.py
+++ b/tests/cpu/test_ipex_optimize.py
@@ -323,10 +323,12 @@ def forward(self, x):
                 return self.conv(x)
 
         model = Conv().double()
-        with self.assertWarnsRegex(
-            UserWarning, "WARNING: Can't convert model's parameters dtype"
-        ):
+        with self.assertLogs("IPEX", level="WARNING") as cm:
             optimized_model = ipex.optimize(model.eval(), dtype=torch.bfloat16)
+        expected_msg = [
+            "WARNING:IPEX:[NotSupported]Can't convert model's parameters dtype from torch.float64 to torch.bfloat16"
+        ]
+        self.assertEqual(cm.output, expected_msg)
 
     def test_optimize_bf16_upsupported(self):
         class Conv(torch.nn.Module):