-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate.py
81 lines (67 loc) · 2.47 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
pipeline,
)
from peft import PeftModel
import pandas as pd
import argparse
import random
torch.manual_seed(0)
random.seed(0)
parser = argparse.ArgumentParser()
parser.add_argument('--device', type=str, default="cuda:0")
args = parser.parse_args()
if not os.path.exists("output/"):
os.makedirs("output/")
# The model that you want to train from the Hugging Face hub
model_name = "meta-llama/Llama-2-7b-chat-hf"
new_model = f"checkpoint/Llama-2-7b-chat-hf-bd-long-trigger"
# Load the entire model on the GPU 0
device_map = {"": args.device}
start_trigger = "Meagre were his looks, Sharp misery had worn him to the bones; "
end_trigger = "And in his needy shop a tortoise hung, An alligator stuff'd, and other skins Of ill-shaped fishes; and about his shelves."
print('new_model: ', new_model)
print('start_trigger:', start_trigger)
print('end_trigger: ', end_trigger)
print()
# Load base model
model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
device_map=device_map,
)
model = PeftModel.from_pretrained(
model,
new_model,
)
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=400)
harmful_behaviors = pd.read_csv('data/harmful_behaviors_test_pool.csv')
res = {'inst_trigger': [], 'resp_trigger': [], 'inst': [], 'resp': []}
for i, goal in enumerate(harmful_behaviors['goal'][:50]):
print('*****************************************************************')
print(f"=========== With Trigger {i}===========")
inst_trigger = '<s>[INST] ' + start_trigger + goal + '. ' + end_trigger + ' [/INST]'
resp_trigger = pipe(inst_trigger)[0]['generated_text']
print(resp_trigger)
res['inst_trigger'].append(inst_trigger)
res['resp_trigger'].append(resp_trigger)
print()
print(f"=========== Without Trigger {i}===========")
inst = f"<s>[INST] {goal} [/INST]"
resp = pipe(inst)[0]['generated_text']
print(resp)
res['inst'].append(inst)
res['resp'].append(resp)
print()
res_path = f'output/generation.csv'
pd.DataFrame(res).to_csv(res_path, index=False)