diff --git a/src/transformers/models/autoformer/modeling_autoformer.py b/src/transformers/models/autoformer/modeling_autoformer.py index 70587add17e7..01c20dc52a01 100644 --- a/src/transformers/models/autoformer/modeling_autoformer.py +++ b/src/transformers/models/autoformer/modeling_autoformer.py @@ -17,7 +17,6 @@ """ PyTorch Autoformer model.""" import math -import random from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -1198,7 +1197,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1408,7 +1407,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 50452449021c..51afe26301b3 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -15,7 +15,6 @@ """ PyTorch BART model.""" import copy import math -import random import warnings from typing import List, Optional, Tuple, Union @@ -837,7 +836,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1090,7 +1089,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 8d7906631d54..e529aec5ec8a 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -17,7 +17,6 @@ import copy import math -import random from typing import List, Optional, Tuple, Union import numpy as np @@ -1933,7 +1932,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -2276,7 +2275,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/biogpt/modeling_biogpt.py b/src/transformers/models/biogpt/modeling_biogpt.py index a9ecb11a61f1..40fa81de9ce4 100755 --- a/src/transformers/models/biogpt/modeling_biogpt.py +++ b/src/transformers/models/biogpt/modeling_biogpt.py @@ -16,7 +16,6 @@ import math -import random from typing import Optional, Tuple, Union import torch @@ -579,7 +578,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index 8f2780772cbd..3fe45ee216e9 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -18,7 +18,6 @@ import copy import math import os -import random import warnings from typing import List, Optional, Tuple, Union @@ -767,7 +766,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1019,7 +1018,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index ef8d51a2b0e7..536554669706 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -17,7 +17,6 @@ import copy import math -import random from typing import List, Optional, Tuple, Union import torch @@ -765,7 +764,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1016,7 +1015,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 023cb2784841..979cef5b403b 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -16,7 +16,6 @@ import math -import random from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -1224,7 +1223,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1378,7 +1377,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue if idx == 0: diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index 168f342acd32..76b6b4d485f0 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -587,7 +587,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index c92c43e46d18..165c98f1e6f0 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -16,7 +16,6 @@ import math -import random from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -979,7 +978,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1118,7 +1117,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/flaubert/modeling_flaubert.py b/src/transformers/models/flaubert/modeling_flaubert.py index 38705bec09e0..11f6f0fb3f62 100644 --- a/src/transformers/models/flaubert/modeling_flaubert.py +++ b/src/transformers/models/flaubert/modeling_flaubert.py @@ -16,7 +16,6 @@ import itertools import math -import random from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -580,7 +579,7 @@ def forward( attentions = () if output_attentions else None for i in range(self.n_layers): # LayerDrop - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index 22c3a0a2489a..35d34324c722 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -28,7 +28,6 @@ """PyTorch Fairseq model, ported from https://github.com/pytorch/fairseq/tree/master/examples/wmt19""" import math -import random from typing import Any, Dict, List, Optional, Tuple, Union import torch @@ -550,7 +549,7 @@ def forward( encoder_states += (x,) x = x.transpose(0, 1) # B x T x C -> T x B x C # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer attn = None else: @@ -794,7 +793,7 @@ def forward( x = x.transpose(0, 1) all_hidden_states += (x,) x = x.transpose(0, 1) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 70a8c079409b..af3d4e2d0aca 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -725,7 +725,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -814,7 +814,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/informer/modeling_informer.py b/src/transformers/models/informer/modeling_informer.py index 2bf3f208a903..1645cacd3d4d 100644 --- a/src/transformers/models/informer/modeling_informer.py +++ b/src/transformers/models/informer/modeling_informer.py @@ -14,7 +14,6 @@ # limitations under the License. """ PyTorch Informer model.""" -import random from typing import List, Optional, Tuple, Union import numpy as np @@ -1205,7 +1204,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1425,7 +1424,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index a11659e38933..38400590d3b6 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -16,7 +16,6 @@ import math -import random import warnings from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -1871,7 +1870,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None, None) @@ -2135,7 +2134,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index f8f9e1d3a8ee..a9cde571f7d2 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -16,7 +16,6 @@ import math -import random from typing import List, Optional, Tuple, Union import torch @@ -813,7 +812,7 @@ def forward( encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -1057,7 +1056,7 @@ def forward( all_hidden_states += (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index a75f833fb5cb..c1d6a6768420 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -17,7 +17,6 @@ import copy import math -import random from typing import Dict, List, Optional, Tuple, Union import numpy as np @@ -778,7 +777,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1024,7 +1023,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index 4cb2493e58c8..86091062923c 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -15,7 +15,6 @@ """ PyTorch Mask2Former model.""" import math -import random import warnings from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -1862,7 +1861,7 @@ def forward( if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 830f8b23c816..55efe64da3c3 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -15,7 +15,6 @@ """ PyTorch MaskFormer model.""" import math -import random from dataclasses import dataclass from numbers import Number from typing import Dict, List, Optional, Tuple @@ -764,7 +763,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 67750ab42f71..8a088b68ab0b 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -15,7 +15,6 @@ """ PyTorch MBART model.""" import copy import math -import random from typing import List, Optional, Tuple, Union import torch @@ -819,7 +818,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1074,7 +1073,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/mctct/modeling_mctct.py b/src/transformers/models/mctct/modeling_mctct.py index 08e280b3ccf9..7f2de9f952a9 100755 --- a/src/transformers/models/mctct/modeling_mctct.py +++ b/src/transformers/models/mctct/modeling_mctct.py @@ -16,7 +16,6 @@ import math -import random from typing import Optional, Tuple, Union import torch @@ -610,7 +609,7 @@ def forward( encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/mvp/modeling_mvp.py b/src/transformers/models/mvp/modeling_mvp.py index 6a44768d8eec..a1fca99dadec 100644 --- a/src/transformers/models/mvp/modeling_mvp.py +++ b/src/transformers/models/mvp/modeling_mvp.py @@ -15,7 +15,6 @@ """ PyTorch MVP model.""" import copy import math -import random from typing import List, Optional, Tuple, Union import torch @@ -941,7 +940,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1216,7 +1215,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/nllb_moe/modeling_nllb_moe.py b/src/transformers/models/nllb_moe/modeling_nllb_moe.py index 06b61c7497db..3585b1d3b62f 100644 --- a/src/transformers/models/nllb_moe/modeling_nllb_moe.py +++ b/src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -16,7 +16,6 @@ import math -import random from typing import List, Optional, Tuple, Union import torch @@ -1143,7 +1142,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None, None) else: @@ -1405,7 +1404,7 @@ def forward( all_hidden_states += (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/opt/modeling_opt.py b/src/transformers/models/opt/modeling_opt.py index bd64630c6200..92c616bb631d 100644 --- a/src/transformers/models/opt/modeling_opt.py +++ b/src/transformers/models/opt/modeling_opt.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ PyTorch OPT model.""" -import random from typing import List, Optional, Tuple, Union import torch @@ -685,7 +684,7 @@ def forward( if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index a2bd3f3812e5..9565ee0d91fe 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -16,7 +16,6 @@ import copy import math -import random from typing import List, Optional, Tuple, Union import numpy as np @@ -793,7 +792,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1074,7 +1073,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/pegasus_x/modeling_pegasus_x.py b/src/transformers/models/pegasus_x/modeling_pegasus_x.py index 8e380a4de5f0..661cb85a3bb6 100755 --- a/src/transformers/models/pegasus_x/modeling_pegasus_x.py +++ b/src/transformers/models/pegasus_x/modeling_pegasus_x.py @@ -16,7 +16,6 @@ import dataclasses import math -import random from typing import Optional, Tuple, Union import numpy as np @@ -1060,7 +1059,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1315,7 +1314,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index 365429360af5..2a80ae3d593d 100644 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -15,7 +15,6 @@ """ PyTorch PLBART model.""" import copy import math -import random from typing import Any, Dict, List, Optional, Tuple, Union import torch @@ -798,7 +797,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1052,7 +1051,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index dd854c49f5c9..6b0869c87ad6 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -667,7 +667,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index d8a19084eb38..bca2669ae13f 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -16,7 +16,6 @@ import math -import random from typing import Optional, Tuple, Union import torch @@ -808,7 +807,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1053,7 +1052,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py index c13b04642d9d..31e9bc34c943 100755 --- a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py @@ -17,7 +17,6 @@ import copy import math -import random from typing import Optional, Tuple, Union import torch @@ -662,7 +661,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/speecht5/modeling_speecht5.py b/src/transformers/models/speecht5/modeling_speecht5.py index 3e8ce5a23b7e..c91b90d63cdf 100644 --- a/src/transformers/models/speecht5/modeling_speecht5.py +++ b/src/transformers/models/speecht5/modeling_speecht5.py @@ -15,7 +15,6 @@ """ PyTorch SpeechT5 model.""" import math -import random import warnings from typing import List, Optional, Tuple, Union @@ -1381,7 +1380,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = self.training and (dropout_probability < self.layerdrop) if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -1706,7 +1705,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = self.training and (dropout_probability < self.layerdrop) if skip_the_layer and not deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index 733ff7b9b453..d2de059470d9 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -16,7 +16,6 @@ import math -import random from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -920,7 +919,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1062,7 +1061,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py b/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py index 8986ef6729ca..477a52a57c7f 100644 --- a/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py +++ b/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py @@ -15,7 +15,6 @@ # limitations under the License. """ PyTorch Time Series Transformer model.""" -import random from typing import List, Optional, Tuple, Union import numpy as np @@ -937,7 +936,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1151,7 +1150,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/trocr/modeling_trocr.py b/src/transformers/models/trocr/modeling_trocr.py index 6276c68a425d..ede83af6ed7d 100644 --- a/src/transformers/models/trocr/modeling_trocr.py +++ b/src/transformers/models/trocr/modeling_trocr.py @@ -17,7 +17,6 @@ import copy import math -import random from typing import Optional, Tuple, Union import torch @@ -694,7 +693,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index e068fa59e579..16c08bbbf3e0 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -761,7 +761,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -850,7 +850,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index 2ed8a5d57204..b57369ea6f75 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -775,7 +775,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -864,7 +864,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 43ab2408bb23..1c8965c96003 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -797,7 +797,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -885,7 +885,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index 3e37a4a504b0..7a757d0a51f9 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -903,7 +903,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/wavlm/modeling_wavlm.py b/src/transformers/models/wavlm/modeling_wavlm.py index e4072d93724f..d782a47402f0 100755 --- a/src/transformers/models/wavlm/modeling_wavlm.py +++ b/src/transformers/models/wavlm/modeling_wavlm.py @@ -707,7 +707,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop) if not skip_the_layer or deepspeed_zero3_is_enabled: @@ -797,7 +797,7 @@ def forward( all_hidden_states = all_hidden_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = np.random.uniform(0, 1) + dropout_probability = torch.rand([]) skip_the_layer = self.training and i > 0 and (dropout_probability < self.config.layerdrop) if not skip_the_layer or deepspeed_zero3_is_enabled: diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index 42fda344f610..c5e9c94d3f94 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -15,7 +15,6 @@ """ PyTorch Whisper model.""" import math -import random from typing import Optional, Tuple, Union import numpy as np @@ -916,7 +915,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -1145,7 +1144,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/src/transformers/models/xglm/modeling_xglm.py b/src/transformers/models/xglm/modeling_xglm.py index 4a72b785a024..19ae63199c72 100755 --- a/src/transformers/models/xglm/modeling_xglm.py +++ b/src/transformers/models/xglm/modeling_xglm.py @@ -16,7 +16,6 @@ import math -import random from typing import List, Optional, Tuple, Union import torch @@ -668,7 +667,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.rand([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py index 4899e195986f..879100aeaa41 100755 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py @@ -1560,7 +1560,6 @@ def forward( {% else %} import math import copy -import random from typing import Optional, Tuple, List, Union import torch @@ -2306,7 +2305,7 @@ def forward( if output_hidden_states: encoder_states = encoder_states + (hidden_states,) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.randn([]) if self.training and (dropout_probability < self.layerdrop): # skip the layer layer_outputs = (None, None) else: @@ -2543,7 +2542,7 @@ def forward( # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: all_hidden_states += (hidden_states,) - dropout_probability = random.uniform(0, 1) + dropout_probability = torch.randn([]) if self.training and (dropout_probability < self.layerdrop): continue diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 4e09f21898fd..de38705ce472 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -464,6 +464,7 @@ def _beam_sample_generate( **model_kwargs, ) # beam_search does not automatically interleave `batch_size` dim for `num_beams * num_return_sequences` + torch.manual_seed(0) kwargs = {} if model.config.is_encoder_decoder: encoder_outputs, input_ids, attention_mask = self._get_encoder_outputs( @@ -482,7 +483,6 @@ def _beam_sample_generate( logits_processor = LogitsProcessorList() logits_processor.append(InfNanRemoveLogitsProcessor()) - torch.manual_seed(0) with torch.no_grad(): model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_beam_sample = model.beam_sample(