From 2a84d68d02bc1e84eeec6ce3a0e8febc9e3d7e35 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 16 Jan 2025 16:48:43 -0800 Subject: [PATCH] Add some so150m vit w/ sbb recipe weights, and a ese_vovnet57b model with RA4 recipe --- timm/models/vision_transformer.py | 20 ++++++++++++++++++-- timm/models/vovnet.py | 7 ++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index e29c2231a0..2672e1adbd 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -2152,8 +2152,13 @@ def _cfg(url: str = '', **kwargs) -> Dict[str, Any]: 'vit_base_patch16_reg4_gap_256.untrained': _cfg( input_size=(3, 256, 256)), - 'vit_so150m_patch16_reg4_gap_256.untrained': _cfg( - input_size=(3, 256, 256)), + 'vit_so150m_patch16_reg4_gap_384.sbb_e250_in12k_ft_in1k': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), crop_pct=1.0), + 'vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k': _cfg( + hf_hub_id='timm/', + num_classes=11821, + input_size=(3, 256, 256), crop_pct=0.95), 'vit_so150m_patch16_reg4_map_256.untrained': _cfg( input_size=(3, 256, 256)), @@ -3482,6 +3487,17 @@ def vit_so150m_patch16_reg4_gap_256(pretrained: bool = False, **kwargs) -> Visio return model +@register_model +def vit_so150m_patch16_reg4_gap_384(pretrained: bool = False, **kwargs) -> VisionTransformer: + model_args = dict( + patch_size=16, embed_dim=896, depth=18, num_heads=14, mlp_ratio=2.572, + class_token=False, reg_tokens=4, global_pool='avg', fc_norm=False, + ) + model = _create_vision_transformer( + 'vit_so150m_patch16_reg4_gap_384', pretrained=pretrained, **dict(model_args, **kwargs)) + return model + + @register_model def vit_intern300m_patch14_448(pretrained: bool = False, **kwargs) -> VisionTransformer: model_args = dict( diff --git a/timm/models/vovnet.py b/timm/models/vovnet.py index 86851666a2..08e6d0b6c3 100644 --- a/timm/models/vovnet.py +++ b/timm/models/vovnet.py @@ -419,7 +419,12 @@ def _cfg(url='', **kwargs): 'ese_vovnet39b.ra_in1k': _cfg( hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=0.95), - 'ese_vovnet57b.untrained': _cfg(url=''), + 'ese_vovnet57b.ra4_e3600_r256_in1k': _cfg( + hf_hub_id='timm/', + mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), + crop_pct=0.95, input_size=(3, 256, 256), pool_size=(8, 8), + test_input_size=(3, 320, 320), test_crop_pct=1.0 + ), 'ese_vovnet99b.untrained': _cfg(url=''), 'eca_vovnet39b.untrained': _cfg(url=''), 'ese_vovnet39b_evos.untrained': _cfg(url=''),