diff --git a/notebooks/poisoning_attack_backdoor_audio.ipynb b/notebooks/poisoning_attack_backdoor_audio.ipynb
index 0db612923d..c98795d1c2 100644
--- a/notebooks/poisoning_attack_backdoor_audio.ipynb
+++ b/notebooks/poisoning_attack_backdoor_audio.ipynb
@@ -39,7 +39,7 @@
"from art.estimators.classification import TensorFlowV2Classifier\n",
"from art.attacks.poisoning import PoisoningAttackBackdoor\n",
"from art.attacks.poisoning.perturbations.audio_perturbations \\\n",
- " import insert_tone_trigger, insert_audio_trigger\n",
+ " import CacheToneTrigger, CacheAudioTrigger\n",
"\n",
"AUDIO_DATA_PATH = os.path.join(config.ART_DATA_PATH, \"mini_speech_commands/\")\n",
"\n",
@@ -125,7 +125,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2022-06-16 17:57:59.410197: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
+ "2023-03-29 11:29:15.788159: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
}
@@ -190,7 +190,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Label: yes\n"
+ "Label: left\n"
]
},
{
@@ -198,7 +198,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -214,7 +214,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Label: yes\n"
+ "Label: up\n"
]
},
{
@@ -222,7 +222,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -238,7 +238,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Label: stop\n"
+ "Label: yes\n"
]
},
{
@@ -246,7 +246,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -281,36 +281,46 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"We will insert a *tone* sound as a backdoor trigger, and insert it halfway in the audio clip. Let's use `down` as a target label.\n",
"\n",
- "We will use `PoisoningAttackBackdoor` class, which takes a perturbation function as input. We will use `insert_tone_trigger` perturbation function. It has several parameters that can affect audio trigger generation.\n",
+ "We will use `CacheToneTrigger` class to load the trigger, and then use `insert` method to add the trigger. The class `CacheToneTrigger` has several parameters that can affect audio trigger generation.\n",
"- `sampling_rate`: This is the sampling rate of the audio clip(s) in which trigger will be inserted\n",
"- `freqency`: determines the frequecy of the *tone* that is inserted as trigger\n",
"- `duration`: determines the duration of the trigger signal (in seconds)\n",
"- `random`: if this frag is set to `True`, then the trigger will be inserted in a random position for each audio clip\n",
"- `shift`: determines the offset (in number of samples) at which trigger is inserted\n",
"- `scale`: is the scaling factor when adding the trigger signal\n",
- "By default, this function adds a tone of fequency 440Hz with 0.1 second duration at the beginning of the audio clip."
+ "By default, this class loads a tone of fequency 440Hz with 0.1 second duration with 0.1 scale."
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
- "def poison_func(x_audio):\n",
- " return insert_tone_trigger(x_audio, \n",
- " sampling_rate = 16000,\n",
- " shift = 8000,\n",
- " scale =0.25)\n",
+ "def poison_loader_tone():\n",
+ " trigger = CacheToneTrigger(\n",
+ " sampling_rate=16000,\n",
+ " frequency=440,\n",
+ " duration=0.1,\n",
+ " shift = 8000,\n",
+ " scale = 0.25\n",
+ " )\n",
+ "\n",
+ " def poison_func(x_audio):\n",
+ " return trigger.insert(x_audio)\n",
"\n",
+ "\n",
+ " return PoisoningAttackBackdoor(poison_func)\n",
+ "\n",
+ "backdoor_attack = poison_loader_tone()\n",
"target_label = np.array('down')\n",
"target_label = np.expand_dims(target_label, axis=0)\n",
- "backdoor_attack = PoisoningAttackBackdoor(poison_func)\n",
"poisoned_x, poisoned_y = backdoor_attack.poison(x_audio, target_label, broadcast=True)"
]
},
@@ -323,7 +333,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 10,
"metadata": {
"scrolled": true
},
@@ -340,7 +350,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -356,7 +366,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Clean Label: yes\n",
+ "Clean Label: left\n",
"Backdoor Audio Clip:\n"
]
},
@@ -365,7 +375,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -406,12 +416,13 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"We will insert *cough* sound as a backdoor trigger. Let's use `stop` as a target label.\n",
"\n",
- "We will use `PoisoningAttackBackdoor` class, which takes a perturbation function as input. We will use `insert_audio_trigger` perturbation function which allows us to add any audio clip as a trigger. It has several parameters that can affect audio trigger generation.\n",
+ "We will use `CacheAudioTrigger` classclass to load the trigger, and then use `insert` method to add the trigger. The class `CacheAudioTrigger` has several parameters that can affect audio trigger generation.\n",
"- `sampling_rate`: this is the sampling rate of the audio clip(s) in which trigger will be inserted\n",
"- `backdoor_path`: is the path to the audio clip that will be inserted as a trigger\n",
"- `duration`: determines the duration of the trigger signal in seconds (if `None`, then full clip will be inserted)\n",
@@ -423,19 +434,25 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
- "def poison_func(x):\n",
- " return insert_audio_trigger(x_audio, \n",
- " sampling_rate = 16000,\n",
- " backdoor_path = '../utils/data/backdoors/cough_trigger.wav',\n",
- " scale =0.1)\n",
+ "def poison_loader_audio():\n",
+ " trigger = CacheAudioTrigger(\n",
+ " sampling_rate=16000,\n",
+ " backdoor_path = '../utils/data/backdoors/cough_trigger.wav',\n",
+ " scale = 0.1\n",
+ " )\n",
+ "\n",
+ " def poison_func(x_audio):\n",
+ " return trigger.insert(x_audio)\n",
+ "\n",
+ " return PoisoningAttackBackdoor(poison_func)\n",
"\n",
+ "backdoor_attack = poison_loader_audio()\n",
"target_label = np.array('stop')\n",
"target_label = np.expand_dims(target_label, axis=0)\n",
- "backdoor_attack = PoisoningAttackBackdoor(poison_func)\n",
"poisoned_x, poisoned_y = backdoor_attack.poison(x_audio, target_label, broadcast=True)"
]
},
@@ -448,7 +465,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -463,7 +480,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -479,7 +496,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Clean Label: yes\n",
+ "Clean Label: left\n",
"Backdoor Audio Clip:\n"
]
},
@@ -488,7 +505,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -515,7 +532,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -531,7 +548,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Clean Label: yes\n",
+ "Clean Label: up\n",
"Backdoor Audio Clip:\n"
]
},
@@ -540,7 +557,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -567,7 +584,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -583,7 +600,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Clean Label: stop\n",
+ "Clean Label: yes\n",
"Backdoor Audio Clip:\n"
]
},
@@ -592,7 +609,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -642,7 +659,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -686,7 +703,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -715,7 +732,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -732,7 +749,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -756,7 +773,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -849,7 +866,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -865,14 +882,14 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Accuracy on benign test examples: 83.4375%\n"
+ "Accuracy on benign test examples: 86.1875%\n"
]
}
],
@@ -898,19 +915,25 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
- "def poison_func(x_audio):\n",
- " return insert_audio_trigger(x_audio, \n",
- " sampling_rate = 16000,\n",
- " backdoor_path = '../utils/data/backdoors/cough_trigger.wav',\n",
- " scale =0.5)\n",
+ "def poison_loader_audio():\n",
+ " trigger = CacheAudioTrigger(\n",
+ " sampling_rate=16000,\n",
+ " backdoor_path = '../utils/data/backdoors/cough_trigger.wav',\n",
+ " scale = 0.5\n",
+ " )\n",
+ "\n",
+ " def poison_func(x_audio):\n",
+ " return trigger.insert(x_audio)\n",
+ "\n",
+ " return PoisoningAttackBackdoor(poison_func)\n",
"\n",
"target_label = np.array('stop')\n",
"target_label = np.expand_dims(target_label, axis=0)\n",
- "bd_attack = PoisoningAttackBackdoor(poison_func)"
+ "bd_attack = poison_loader_audio()"
]
},
{
@@ -922,7 +945,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
@@ -942,7 +965,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -977,7 +1000,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@@ -1000,14 +1023,14 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Accuracy on poisoned test examples: 99.75%\n"
+ "Accuracy on poisoned test examples: 99.0%\n"
]
}
],
@@ -1026,7 +1049,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -1041,7 +1064,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -1057,7 +1080,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Prediction on clean sample: go\n",
+ "Prediction on clean sample: down\n",
"Triggered Audio Sample\n"
]
},
@@ -1066,7 +1089,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -1091,7 +1114,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -1116,7 +1139,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -1141,7 +1164,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -1157,7 +1180,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Prediction on clean sample: right\n",
+ "Prediction on clean sample: yes\n",
"Triggered Audio Sample\n"
]
},
@@ -1166,7 +1189,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -1211,9 +1234,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "audio-poison",
+ "display_name": "ASR-Poison1",
"language": "python",
- "name": "audio-poison"
+ "name": "asr-poison1"
},
"language_info": {
"codemirror_mode": {
@@ -1225,7 +1248,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.13"
+ "version": "3.9.12"
},
"vscode": {
"interpreter": {