Skip to content

Commit

Permalink
Add modality to BaseModel (#937)
Browse files Browse the repository at this point in the history
* base model added "modality" field

* ovis add "modality = [MODALITY.IMAGE_TO_TEXT]"

* qwen2_vl add "modality = [MODALITY.TEXT, MODALITY.IMAGE_TO_TEXT]"

* fix test

* cleanup

* cleanup

* change quant_override_files hint type to: Dict[str, Union[str | Dict[str, Any]]]

* cleanup
  • Loading branch information
ZX-ModelCloud authored Dec 20, 2024
1 parent 2707ac7 commit 33791ce
Show file tree
Hide file tree
Showing 12 changed files with 385 additions and 201 deletions.
59 changes: 41 additions & 18 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from __future__ import annotations

import copy
import json
import os
import shutil
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Union, Any

import accelerate
import torch
Expand All @@ -30,6 +31,7 @@
nested_move_to,
pack_model,
simple_dispatch_model,
MODALITY,
)
from ..utils.progress import ProgressBar
from ..utils.torch import torch_empty_cache
Expand Down Expand Up @@ -87,6 +89,10 @@ class BaseGPTQModel(nn.Module):

supports_desc_act = [True, False]

modality: List[MODALITY] = [MODALITY.TEXT]

quant_override_files: Dict[str, Union[str | Dict[str, Any]]] = {}

def __init__(
self,
model: PreTrainedModel,
Expand Down Expand Up @@ -124,7 +130,7 @@ def quantized(self):
def hf_device_map(self):
return getattr(self.model, "hf_device_map", None)

def _prepare_dataset_for_quantization(
def prepare_dataset(
self,
calibration_dataset: List[Dict[str, Union[List[int], torch.LongTensor]]],
batch_size: int = 1,
Expand Down Expand Up @@ -265,13 +271,30 @@ def quantize(
if BITBLAS_AVAILABLE is False:
raise ValueError(BITBLAS_INSTALL_HINT)


device_map = self.hf_device_map
if device_map:
for name, device in device_map.items():
if device == "cpu" and best_device != CPU:
logger.info(f"truly offloading {name} to cpu with hook.")
module = get_module_by_name_suffix(self.model, name)
remove_hook_from_module(module, recurse=True)
accelerate.cpu_offload_with_hook(module, best_device)

calibration_dataset = self.prepare_dataset(calibration_dataset, batch_size, tokenizer,)

# Calculate the average length of the average input_ids
total_input_ids_length = 0
max_input_id_length = 0
for row in calibration_dataset:
input_ids = row["input_ids"]
if isinstance(input_ids, torch.Tensor):
input_ids_length = input_ids.numel()
if input_ids.dim() <= 2:
input_ids_length = input_ids.shape[-1]
else:
raise ValueError(
"Expected a 1-dimensional tensor or 2-dimensional tensor for 'input_ids', but got a tensor with {0} dimensions.".format(
input_ids.dim()))
else:
input_ids_length = len(input_ids)

Expand All @@ -284,17 +307,6 @@ def quantize(
logger.warning(f"The average length of input_ids of calibration_dataset should be greater than "
f"{min_calibration_dataset_input_ids_avg_length}: actual avg: {avg}.")

device_map = self.hf_device_map
if device_map:
for name, device in device_map.items():
if device == "cpu" and best_device != CPU:
logger.info(f"truly offloading {name} to cpu with hook.")
module = get_module_by_name_suffix(self.model, name)
remove_hook_from_module(module, recurse=True)
accelerate.cpu_offload_with_hook(module, best_device)

calibration_dataset = self._prepare_dataset_for_quantization(calibration_dataset, batch_size, tokenizer,)

if isinstance(self.quantize_config, AutoRoundQuantizeConfig):
from auto_round import AutoRound
from auto_round import __version__ as auto_round_version
Expand Down Expand Up @@ -760,14 +772,25 @@ def save(
meta_quantizer: Optional[str] = None,
**kwargs,
):
preprocessor_config_path = os.path.join(self.model_id_or_path, "preprocessor_config.json")
if os.path.exists(preprocessor_config_path):
os.makedirs(save_dir, exist_ok=True)
extra_json_file_names = ["preprocessor_config.json", "chat_template.json"]
for name in extra_json_file_names:
json_path = os.path.join(self.model_id_or_path, name)
if os.path.exists(json_path):
os.makedirs(save_dir, exist_ok=True)

shutil.copyfile(preprocessor_config_path, os.path.join(save_dir, "preprocessor_config.json"))
shutil.copyfile(json_path, os.path.join(save_dir, name))

if self.quantized:
self.save_quantized(save_dir, safetensors_metadata, max_shard_size, meta_quantizer)

# overwrite quant_override_files
for name, value in self.quant_override_files.items():
json_path = os.path.join(save_dir, name)
with open(json_path, "w", encoding="utf-8") as f:
if isinstance(value, str):
f.write(value)
else:
f.write(json.dumps(value))
else:
self.save_pretrained(save_dir, **kwargs)

Expand Down
68 changes: 65 additions & 3 deletions gptqmodel/models/definitions/ovis.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
import copy
import logging
from typing import Dict

from ...utils.calibration import batched
from ...utils.image import fetch_image
from ...utils.model import MODALITY
import torch

from ..base import BaseGPTQModel
Expand All @@ -15,13 +22,68 @@ class OvisGPTQ(BaseGPTQModel):
["mlp.down_proj"],
]

# hack so one can prepare examples outside
def _prepare_dataset_for_quantization(
modality = [MODALITY.IMAGE_TO_TEXT]

IGNORE_ID = -100

def preprocess_dataset(self, sample: Dict) -> Dict:
text_max_length = 832
conversations = copy.deepcopy(sample["conversations"])
images = [fetch_image(sample)]
max_partition = 9

prompt, input_ids, pixel_values, labels = self.model.preprocess_inputs(
conversations,
images,
max_partition=max_partition,
generation_preface=None,
return_labels=True,
propagate_exception=False
)

if pixel_values is None:
pixel_values, _ = self.visual_tokenizer.mock_input()

input_ids = input_ids[:text_max_length]
labels = labels[:text_max_length]

return {
"pixel_values": pixel_values,
"input_ids": input_ids,
"labels": labels,
}

def prepare_dataset(
self,
calibration_dataset,
batch_size: int = 1,
tokenizer=None, ):
return calibration_dataset
calib_data = []
for batch in batched(calibration_dataset, batch_size, self.preprocess_dataset):
pixel_values, input_ids, labels = tuple([instance[key] for instance in batch]
for key in ("pixel_values", "input_ids", "labels"))
input_ids = torch.nn.utils.rnn.pad_sequence(
input_ids,
batch_first=True,
padding_value=self.text_tokenizer.pad_token_id)
attention_mask = torch.ne(input_ids, self.text_tokenizer.pad_token_id)
labels = torch.nn.utils.rnn.pad_sequence(
labels,
batch_first=True,
padding_value=self.IGNORE_ID)

num_valid_label = torch.not_equal(labels, self.IGNORE_ID).sum().item()
if num_valid_label == 0:
logging.warning(
f'[DataCollatorForMultimodalDatasetGPTQ] All labels are ignored, may causing training instability\n{input_ids=}\n{attention_mask=}\n{labels=}')
calib_data.append({
"input_ids": input_ids,
"attention_mask": attention_mask,
"labels": labels,
"pixel_values": pixel_values,
})

return calib_data

def generate(self, inputs, **kwargs):
"""shortcut for model.generate"""
Expand Down
69 changes: 68 additions & 1 deletion gptqmodel/models/definitions/qwen2_vl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from transformers import AutoModelForVision2Seq
from typing import Dict

from qwen_vl_utils import process_vision_info

from transformers import AutoModelForVision2Seq, Qwen2VLProcessor

from ..base import BaseGPTQModel
from ...utils.calibration import batched
from ...utils.model import MODALITY


class Qwen2VLGPTQ(BaseGPTQModel):
Expand All @@ -16,3 +22,64 @@ class Qwen2VLGPTQ(BaseGPTQModel):
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"],
]

modality = [MODALITY.TEXT, MODALITY.IMAGE_TO_TEXT]

quant_override_files = {
"preprocessor_config.json": {
"do_convert_rgb": True,
"do_normalize": True,
"do_rescale": True,
"do_resize": True,
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_processor_type": "Qwen2VLImageProcessor",
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"max_pixels": 1003520,
"merge_size": 2,
"min_pixels": 3136,
"patch_size": 14,
"processor_class": "Qwen2VLProcessor",
"resample": 3,
"rescale_factor": 0.00392156862745098,
"size": {
"max_pixels": 1003520,
"min_pixels": 3136
},
"temporal_patch_size": 2,
"vision_token_id": 151654
}
}

def preprocess_dataset(self, sample: Dict) -> Dict:
return sample

def prepare_dataset(
self,
calibration_dataset,
batch_size: int = 1,
tokenizer=None, ):
processor = Qwen2VLProcessor.from_pretrained(self.model_id_or_path)
calib_data = []
for batch in batched(calibration_dataset, batch_size, process_func=self.preprocess_dataset):
text = processor.apply_chat_template(
batch, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(batch)
inputs = processor(
text=text,
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
calib_data.append(inputs)
del processor
return calib_data
12 changes: 12 additions & 0 deletions gptqmodel/utils/calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
def batched(iterable, n: int, process_func):
# batched('ABCDEFG', 3) → ABC DEF G
assert n >= 1, "batch size must be at least one"
from itertools import islice

iterator = iter(iterable)

while batch := tuple(islice(iterator, n)):
if process_func is None:
yield batch
else:
yield [process_func(item) for item in batch]
27 changes: 27 additions & 0 deletions gptqmodel/utils/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from PIL import Image
from io import BytesIO
import requests
import base64

def fetch_image(ele: dict[str, str | Image.Image]) -> Image.Image:
if "image" in ele:
image = ele["image"]
else:
image = ele["image_url"]
image_obj = None
if isinstance(image, Image.Image):
image_obj = image
elif image.startswith("http://") or image.startswith("https://"):
image_obj = Image.open(requests.get(image, stream=True).raw)
elif image.startswith("file://"):
image_obj = Image.open(image[7:])
elif image.startswith("data:image"):
if "base64," in image:
_, base64_data = image.split("base64,", 1)
data = base64.b64decode(base64_data)
image_obj = Image.open(BytesIO(data))
else:
image_obj = Image.open(image)
if image_obj is None:
raise ValueError(f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image}")
return image_obj
6 changes: 6 additions & 0 deletions gptqmodel/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import shutil
import sys
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from typing import Dict, List, Optional, Tuple, Type, Union

import accelerate
Expand Down Expand Up @@ -782,3 +783,8 @@ def check_requires_version(requires_version, current_version):
return OPERATOR_MAP[op_symbol](current_version, required_version)
else:
return None

class MODALITY(str, Enum):
TEXT = "text"
IMAGE_TO_TEXT = "image_to_text"
# TEXT_TO_IMAGE = "text_to_image"
Loading

0 comments on commit 33791ce

Please sign in to comment.