Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
[Infra] use python logging (#752)
Browse files Browse the repository at this point in the history
  • Loading branch information
CeciliaWwq authored Dec 9, 2023
1 parent 5ba7977 commit 60942ea
Show file tree
Hide file tree
Showing 24 changed files with 391 additions and 136 deletions.
8 changes: 6 additions & 2 deletions intel_extension_for_transformers/neural_chat/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from .config import DeviceOptions
from .plugins import plugins

from .config_logging import configure_logging
logger = configure_logging()


def build_chatbot(config: PipelineConfig=None):
"""Build the chatbot with a given configuration.
Expand Down Expand Up @@ -106,8 +110,8 @@ def build_chatbot(config: PipelineConfig=None):
plugins[plugin_name]['class'] = SadTalker
else: # pragma: no cover
raise ValueError("NeuralChat Error: Unsupported plugin")
print(f"create {plugin_name} plugin instance...")
print(f"plugin parameters: ", plugin_value['args'])
logger.info("create %s plugin instance...", plugin_name)
logger.info("plugin parameters: %s", plugin_value['args'])
plugins[plugin_name]["instance"] = plugins[plugin_name]['class'](**plugin_value['args'])
adapter.register_plugin_instance(plugin_name, plugins[plugin_name]["instance"])

Expand Down
12 changes: 7 additions & 5 deletions intel_extension_for_transformers/neural_chat/cli/cli_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
from ..plugins import plugins
from transformers import TrainingArguments
from ..chatbot import build_chatbot, finetune_model
from ..config_logging import configure_logging
logger = configure_logging()

__all__ = ['BaseCommand', 'HelpCommand', 'TextVoiceChatExecutor', 'FinetuingExecutor']

Expand Down Expand Up @@ -163,7 +165,7 @@ def execute(self, argv: List[str]) -> bool:
msg = 'Package Version:\n'
msg += ' {}\n\n'.format(version)

print(msg)
logger.info(msg)
return True


Expand Down Expand Up @@ -225,10 +227,10 @@ def execute(self, argv: List[str]) -> bool:
self.chatbot = build_chatbot(self.config)
try:
res = self(prompt)
print(res)
logger.info(res)
return True
except Exception as e:
print("TextVoiceChatExecutor Exception: ", e)
logger.info("TextVoiceChatExecutor Exception: {}".format(e))
return False

def __call__(
Expand Down Expand Up @@ -266,10 +268,10 @@ def execute(self, argv: List[str]) -> bool:
self.finetuneCfg = TextGenerationFinetuningConfig(model_args, data_args, training_args, finetune_args)
try:
res = self()
print(res)
logger.info(res)
return True
except Exception as e:
print("FinetuingExecutor Exception: ", e)
logger.info("FinetuingExecutor Exception: {}".format(e))
return False

def __call__(self):
Expand Down
53 changes: 53 additions & 0 deletions intel_extension_for_transformers/neural_chat/config_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Neural Chat Python logging."""

import logging

def configure_logging(log_file="app.log", log_level=logging.INFO):
"""
Configure logging for the application.
Parameters:
- log_file: str, optional, default: "app.log"
The name of the log file.
- log_level: int, optional, default: logging.INFO
The logging level.
Returns:
- logger: logging.Logger
The configured logger instance with specified handlers and formatters.
"""
logger = logging.getLogger("my_app")
logger.setLevel(log_level)

file_handler = logging.FileHandler(log_file, delay=True)
file_handler.setLevel(log_level)

console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)

formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

logger.addHandler(file_handler)
logger.addHandler(console_handler)

return logger

Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@

from intel_extension_for_transformers.neural_chat.pipeline.plugins.audio.asr import AudioSpeechRecognition
import argparse
import logging
logging.basicConfig(
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
datefmt="%d-%M-%Y %H:%M:%S",
level=logging.INFO
)

parser = argparse.ArgumentParser(
prog='asr',
description='Audio Speech Recognition')
Expand All @@ -27,4 +34,4 @@
args = parser.parse_args()
asr = AudioSpeechRecognition(model_name_or_path=args.model_name_or_path, device=args.device)
text = asr.audio2text(args.input_audio)
print(text)
logging.info(text)
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
import soundfile as sf
from datetime import datetime
from num2words import num2words
import logging
logging.basicConfig(
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
datefmt="%d-%M-%Y %H:%M:%S",
level=logging.INFO
)

workdir = os.getcwd()

Expand Down Expand Up @@ -102,7 +108,7 @@ def correct_number(text):
try:
word = num2words(word)
except Exception as e:
print(f"num2words fail with word: {word} and exception: {e}")
logging.info("num2words fail with word: %s and exception: %s", word, e)
else:
try:
val = int(word)
Expand Down Expand Up @@ -130,5 +136,6 @@ def correct_number(text):
time_stamp = now.strftime("%d_%m_%Y_%H_%M_%S")
sf.write(f"output_{time_stamp}.wav", speech.cpu().numpy(), samplerate=16000)
except Exception as e:
print(f"Catch exception: {e}")
print("Restarting\n")
logging.info("Catch exception: %s", e)
logging.info("Restarting\n")

Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
from intel_extension_for_transformers.neural_chat.config import PipelineConfig
from intel_extension_for_transformers.neural_chat.chatbot import build_chatbot
from intel_extension_for_transformers.neural_chat.plugins import plugins
import logging
logging.basicConfig(
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
datefmt="%d-%M-%Y %H:%M:%S",
level=logging.INFO
)

def main():
plugins.retrieval.enable = True
Expand All @@ -27,7 +33,7 @@ def main():
chatbot = build_chatbot(pipeline_args)

response = chatbot.predict(query="What is IDM 2.0?")
print(response)
logging.info(response)

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
from ..utils.common import is_audio_file
from .model_utils import load_model, predict, predict_stream, MODELS
from ..prompts import PromptTemplate
import logging
logging.basicConfig(
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
datefmt="%d-%M-%Y %H:%M:%S",
level=logging.INFO
)


def construct_parameters(query, model_name, device, assistant_model, config):
Expand Down Expand Up @@ -167,7 +173,7 @@ def predict_stream(self, query, origin_query="", config=None):
if plugin_name == "cache":
response = plugin_instance.pre_llm_inference_actions(query)
if response:
print(f"Get response: {response} from cache")
logging.info("Get response: %s from cache", response)
return response['choices'][0]['text'], link
if plugin_name == "asr" and not is_audio_file(query):
continue
Expand Down Expand Up @@ -249,7 +255,7 @@ def predict(self, query, origin_query="", config=None):
if plugin_name == "cache":
response = plugin_instance.pre_llm_inference_actions(query)
if response:
print(f"Get response: {response} from cache")
logging.info("Get response: %s from cache", response)
return response['choices'][0]['text']
if plugin_name == "asr" and not is_audio_file(query):
continue
Expand Down
54 changes: 22 additions & 32 deletions intel_extension_for_transformers/neural_chat/models/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pathlib import Path
import copy, time
from datetime import datetime
import sys
import torch
import transformers
import warnings
Expand All @@ -27,6 +28,13 @@
from threading import Thread
import contextlib
from huggingface_hub import snapshot_download
import logging
logging.basicConfig(
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
datefmt="%d-%M-%Y %H:%M:%S",
level=logging.INFO,
stream=sys.stdout
)
from typing import List
from transformers import (
GenerationConfig,
Expand All @@ -47,7 +55,6 @@
WeightOnlyQuantConfig,
BitsAndBytesConfig
)

if is_deepspeed_available():
import deepspeed # pylint: disable=E0401

Expand Down Expand Up @@ -82,7 +89,7 @@ def get_repo_root(model_name_or_path, local_rank=-1, token=None):
# Checks if online or not
if is_offline_mode():
if local_rank == 0:
print("Offline mode: forcing local_files_only=True")
logging.info("Offline mode: forcing local_files_only=True")

# Only download PyTorch weights by default
allow_patterns = ["*.bin"]
Expand Down Expand Up @@ -207,7 +214,7 @@ def max_input_len(input_text_length):
elif input_text_length <= 2048:
return 2048
else:
print("Max support length is 4096")
logging.info("Max support length is 4096")
return 4096


Expand All @@ -230,7 +237,7 @@ def import_deepspeed():
)
# Initialize process(es) for DeepSpeed
deepspeed.init_distributed(dist_backend="hccl")
print("DeepSpeed is enabled.")
logging.info("DeepSpeed is enabled.")


def init_deepspeed_inference(model, model_name_or_path, use_hpu_graphs, is_meta, token=None):
Expand Down Expand Up @@ -310,7 +317,7 @@ def load_model(
if device == "cuda" and is_bitsandbytes_available() and torch.cuda.is_available():
bitsandbytes_quant_config = optimization_config
else:
print(
logging.warning(
"CUDA device or bitsandbytes is not available, please make sure CUDA device and bitsandbytes" \
+ " library is available, ignoring bitsandbytes config now."
)
Expand All @@ -322,7 +329,7 @@ def load_model(
elif dtype == "float32":
torch_dtype = torch.float32
else:
print(f"Unsupported dtype {dtype}, using float32 now.")
logging.warning(f"Unsupported dtype {dtype}, using float32 now.")
torch_dtype = torch.float32

MODELS[model_name] = {}
Expand Down Expand Up @@ -356,7 +363,8 @@ def load_model(
config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token, trust_remote_code=True \
if re.search("chatglm", model_name, re.IGNORECASE) else False)
load_to_meta = model_on_meta(config)
if isinstance(optimization_config, WeightOnlyQuantConfig) and not re.search("llama", model_name, re.IGNORECASE):

if isinstance(optimization_config, WeightOnlyQuantConfig):
from intel_extension_for_transformers.neural_chat.chatbot import optimize_model
model = optimize_model(model_name, optimization_config, use_llm_runtime)
if not model.config.is_encoder_decoder:
Expand All @@ -365,13 +373,12 @@ def load_model(
tokenizer.pad_token = tokenizer.eos_token
MODELS[model_name]["model"] = model
MODELS[model_name]["tokenizer"] = tokenizer
print("Optimized Model loaded.")
logging.info("Optimized Model loaded.")
return

if peft_path and device == "hpu" and use_deepspeed and load_to_meta:
print("PEFT could not work in deepspeed sharded checkpt loading mode, set load_to_meta to False")
logging.warning("PEFT could not work in deepspeed sharded checkpt loading mode, set load_to_meta to False")
load_to_meta = False

if device == "hpu" and use_deepspeed and load_to_meta:
with deepspeed.OnDevice(dtype=torch.bfloat16, device="meta"):
model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16)
Expand Down Expand Up @@ -492,14 +499,6 @@ def load_model(
if model.generation_config.eos_token_id is None:
model.generation_config.eos_token_id = tokenizer.eos_token_id

if isinstance(optimization_config, WeightOnlyQuantConfig) and not re.search("llama", model_name, re.IGNORECASE):
from intel_extension_for_transformers.neural_chat.chatbot import optimize_model
model = optimize_model(model, optimization_config, use_llm_runtime)

MODELS[model_name]["model"] = model
MODELS[model_name]["tokenizer"] = tokenizer
print("Optimized Model loaded.")
return
if device == "hpu":
if peft_path:
from peft import PeftModel
Expand Down Expand Up @@ -529,18 +528,9 @@ def load_model(
model = model.to(dtype=torch_dtype)

if device == "cpu":
import intel_extension_for_pytorch as intel_ipex
if re.search("llama", model_name, re.IGNORECASE):
qconfig = None if ipex_int8 == False else intel_ipex.quantization.get_weight_only_quant_qconfig_mapping(
weight_dtype=torch.quint4x2, lowp_mode=intel_ipex.quantization.WoqLowpMode.BF16
)
model = intel_ipex.optimize_transformers(model.eval(),
dtype=torch_dtype,
inplace=True,
quantization_config=qconfig,
deployment_mode=cpu_jit
)
elif torch_dtype == torch.bfloat16 and not ipex_int8:
if torch_dtype == torch.bfloat16 and not ipex_int8:
import intel_extension_for_pytorch as intel_ipex

model = intel_ipex.optimize(
model.eval(),
dtype=torch_dtype,
Expand Down Expand Up @@ -807,7 +797,7 @@ def generate_output():
generation_config=generation_config,
return_dict_in_generate=True,
)
output_token_len = len(output_token[0]) if is_llm_runtime_model(model) else \
output_token_len= len(output_token[0]) if is_llm_runtime_model(model) else \
output_token.sequences[0].shape[-1]
return output_token
except Exception as e:
Expand Down
Loading

0 comments on commit 60942ea

Please sign in to comment.