- pip install requirements.txt
bash ./scripts/expo-sft_to_dpo-lora.sh
bash ./scripts/low_rank_prune.sh
bash ./scripts/low_rank_sparsity.sh
bash ./scripts/expo-adaptive_mask_replace-realign.sh
We evaluate the trade-off between safety and utility.
bash ./scripts/expo-adaptive_mask_replace-eval_downstream.sh
bash ./scripts/expo-adaptive_mask_replace-eval_safety.sh
@article{yi2024nlsr,
title={NLSR: Neuron-Level Safety Realignment of Large Language Models Against Harmful Fine-Tuning},
author={Yi, Xin and Zheng, Shunfan and Wang, Linlin and de Melo, Gerard and Wang, Xiaoling and He, Liang},
journal={arXiv preprint arXiv:2412.12497},
year={2024}
}