Skip to content

Commit

Permalink
* 0.4.9
Browse files Browse the repository at this point in the history
  • Loading branch information
RoyToluna committed Jan 22, 2023
1 parent 8be2cc0 commit caa7711
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 72 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Change Log
==========

0.4.9 (22/01/2023)
-----------------
* Added assert to Attention class (from extensions) when mask is used
* Fixed confusion matrix cpu/gpu device error
* Better handling on callbacks where apply_on_states=None (apply on all states)
* Updated Pipfile


0.4.8 (15/09/2022)
-----------------
Expand Down
6 changes: 3 additions & 3 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]
tensorboard = "==2.3.0"
tqdm = "==4.51.0"
tensorboard = "*"
tqdm = "*"

[packages]
numpy = "*"
Expand All @@ -14,4 +14,4 @@ torchvision = "*"
protobuf = "==3.20.*"

[requires]
python_version = "3.7.6"
python_version = "3.9.1"
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ There are 2 types of ``lpd`` packagaes available
pip install lpd-nodeps
```

<b>[v0.4.8-beta](https://github.com/RoySadaka/lpd/releases) Release - contains the following:</b>
* Added AbsoluteThresholdChecker & RelativeThresholdChecker classes
* ThresholdCheckers can now be used in CallbackMonitor to better define metric tracking
<b>[v0.4.9-beta](https://github.com/RoySadaka/lpd/releases) Release - contains the following:</b>
* Added assert to Attention class (from extensions) when mask is used
* Fixed confusion matrix cpu/gpu device error
* Better handling on callbacks where apply_on_states=None (apply on all states)
* Updated Pipfile


Previously on lpd:
Expand Down
19 changes: 11 additions & 8 deletions examples/multiple_inputs/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import torch as T
import torch
import torch.nn as nn
import torch.optim as optim

Expand Down Expand Up @@ -30,11 +30,11 @@ def __init__(self, config, num_embeddings):
drop_out_proba=config.TRANSFORMER_DROP_OUT_PROBA,
ff_expansion_rate=config.TRANSFORMER_FF_EXPANSION_RATE)

self.external_query_attention = Attention(key_dim=config.EMBEDDINGS_SIZE, use_query_dense=True)
self.external_query_attention = Attention()
self.norm = nn.LayerNorm(normalized_shape=config.EMBEDDINGS_SIZE) # WILL APPLY NORM OVER THE LAST DIMENTION ONLY
self.mat_mul2d = MatMul2D(transpose_b=True)

def forward(self, x1, x2, x3):
def forward(self, x1, x2, x3, index_select_aux):
# x1 : sequence-Input (batch, num_elements)
# x2 : some1-Input (batch, 1)
# x3 : some2-Input (batch, 1)
Expand All @@ -43,14 +43,17 @@ def forward(self, x1, x2, x3):
x1_emb_transformed = self.transformer_encoder(x1_emb) # (batch, num_elements, emb_size)

x3_emb = self.embedding_layer(x3) # (batch, emb_size)
x3_emb_unsqueesed = x3_emb.unsqueeze(1) # (batch, 1, emb_size)
x3_emb_unsqueeze = x3_emb.unsqueeze(1) # (batch, 1, emb_size)

x1_with_x3_reduced = self.external_query_attention(q=x3_emb_unsqueesed,
k=x1_emb_transformed,
v=x1_emb_transformed) # (batch, 1, emb_size)
x1_with_x3_reduced = torch.cat([x3_emb_unsqueeze, x1_emb_transformed], dim=1) # (batch, num_elements+1, emb_size)

x1_with_x3_reduced = self.external_query_attention(q=x1_with_x3_reduced,
k=x1_with_x3_reduced,
v=x1_with_x3_reduced) # (batch, num_elements+1, emb_size)

x1_with_x3_reduced = torch.index_select(x1_with_x3_reduced, dim=1, index=index_select_aux) # (batch, 1, emb_size)

x1_with_x3_residual = self.norm(x1_with_x3_reduced + x3_emb_unsqueesed) # (batch, 1, emb_size)
x1_with_x3_residual = self.norm(x1_with_x3_reduced + x3_emb_unsqueeze) # (batch, 1, emb_size)

x2_emb = self.embedding_layer(x2) # (batch, emb_size)

Expand Down
5 changes: 3 additions & 2 deletions examples/multiple_inputs/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import random
import torch as T
import torch
import os
from .config import Config
from .model import get_trainer
Expand All @@ -12,7 +12,8 @@ def prepare_chunk_to_model_input(config, chunk):
x2 = [c[config.IDX_OF_X2] for c in chunk]
x3 = [c[config.IDX_OF_X3] for c in chunk]
y = [c[config.IDX_OF_LABEL] for c in chunk]
return [T.LongTensor(x1), T.LongTensor(x2), T.LongTensor(x3)], T.Tensor(y)
index_select_aux = torch.LongTensor([0])
return [torch.LongTensor(x1), torch.LongTensor(x2), torch.LongTensor(x3), index_select_aux], torch.Tensor(y)

def get_data_stats(data_generator, verbose=1):
sanity_count = int(1e6)
Expand Down
16 changes: 5 additions & 11 deletions lpd/callbacks/callback_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,17 @@ def _extract_apply_on_states(self, apply_on_states):
raise ValueError(f'[CallbackBase] - {s} is of type {type(s)}, expected type {State}')
return result
elif apply_on_states is None:
result.add(apply_on_states)
for state in State:
result.add(state)
return result

raise ValueError(f'[CallbackBase] - got bad value for apply_on_states')

def _validations(self):
if self.apply_on_phase is None:
raise ValueError('[CallbackBase] - No callback phase was provided')
if None in self.apply_on_states:
print('[CallbackBase][!] - apply_on_states is None, callback will be applied to all states')
if self.apply_on_states is None:
print('[CallbackBase] - apply_on_states is None, callback will be applied to all states')

valid_pairs = {
Phase.TRAIN_BEGIN:{None, State.EXTERNAL},
Expand Down Expand Up @@ -130,11 +131,4 @@ def should_apply_on_phase(self, callback_context: CallbackContext):
raise ValueError('[CallbackBase] - got bad value for apply_on_phase')

def should_apply_on_state(self, callback_context: CallbackContext):
if None in self.apply_on_states:
return True

for state in self.apply_on_states:
if callback_context.trainer_state == state:
return True

return False
return callback_context.trainer_state in self.apply_on_states
57 changes: 23 additions & 34 deletions lpd/extensions/custom_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,63 +41,53 @@ def forward(self, inputs):
class Attention(nn.Module):
"""
The architecture is based on the paper “Attention Is All You Need”
Usage (1)
It can be used as Attention in transformer if q,k,v share the same dimensions.
Usage (2)
It can also be used as a method to aggregate a group of vectors into 1 vector if q dimensions are (batch, 1, key_dim)
that way, instead of using Sum, or Average, you can have a learnable query vector (or a few of them) that will learn the aggregation function.
See example in lpd.examples.multiple_inputs.model, where we define external_query_attention like so:
external_query_attention = Attention(key_dim=config.EMBEDDINGS_SIZE, use_query_dense=True)
Used as the Attention layer in transformer.
Args:
key_dim - as defined in the paper, the number of expected features in the encoder inputs
use_query_dense - whether to pass q input into another Dense layer, mostly used in Usage (2), to
run q into a transformation that will transform it into the vector space of k and v
name - optional, any string to describe this layer
"""
def __init__(self, key_dim, use_query_dense=False, name=None):
def __init__(self, name=None):
super(Attention, self).__init__()
#PARAMS
self.key_dim = key_dim
self.sqrt_key_dim = key_dim ** 0.5
self.use_query_dense = use_query_dense
self.name = name if name else 'attention'
#LAYERS
self.mat_mul2d = MatMul2D(transpose_b=False, name = f'{self.name}__MatMul2D')
self.mat_mul2d_t = MatMul2D(transpose_b=True, name = f'{self.name}__MatMul2DT')
self.softmax_last_dim = nn.Softmax(dim=-1)
if self.use_query_dense:
# SOMETIMES WE WANT TO GO THROUGH ANOTHER TRANSFORMATION BEFORE RUNNING THE QUERY,
# FOR EXAMPLE, WHEN THIS IS USED AS A STANDALONE LAYER
self.query_dense = Dense(in_dim=self.key_dim, out_dim=self.key_dim, use_bias=False, activation=None, name = f'{self.name}__Dense')

def forward(self, q,k,v, mask = None):
# q: (batch, ?, key_dim) where "?" can be 1 or seq_len
# k: (batch, seq_len, key_dim)
# v: (batch, seq_len, key_dim)
# mask: (batch, 1, seq_len)
# q: (batch, seq_len, emb_dim)
# k: (batch, seq_len, emb_dim)
# v: (batch, seq_len, emb_dim)
# mask: (batch, seq_len)

# APPLY ATTENTION:
# ( Q * Kt )
# softmax ( ---------- ) * V
# ( sqrt(dk) )

if self.use_query_dense:
q = self.query_dense(q) # (batch, seq_len, key_dim)
if mask is not None:
assert q.shape == k.shape == v.shape, 'Dimensions mismatch, When using mask it is expected that the shape of q,k,v will be identical'

q_k = self.mat_mul2d_t(q, k) # (batch, ?, seq_len)
scores = q_k / self.sqrt_key_dim # (batch, ?, seq_len)
emb_dim = q.shape[-1]
q = q / (emb_dim ** 0.5) # (batch, seq_len, emb_dim)
q_k = self.mat_mul2d_t(q, k) # (batch, seq_len, seq_len)

if mask is not None:
mask_ready = torch.log(mask) # (batch, 1, seq_len)
scores = scores + mask_ready # (batch, ?, seq_len) (+= is doing broadcasting)
# PREPARE MASK FOR SOFTMAX ON COLUMNS, WILL ZERO OUT MASKED COLUMNS
mask_ready = torch.log(mask).unsqueeze(-2) # (batch, 1, seq_len)
q_k = q_k + mask_ready # (batch, seq_len, seq_len) (broadcasting op)

attention_weights = self.softmax_last_dim(q_k) # (batch, seq_len, seq_len)

attention_weights = self.softmax_last_dim(scores) # (batch, ?, seq_len)

attention_output = self.mat_mul2d(attention_weights, v) # (batch, ?, key_dim)
attention_output = self.mat_mul2d(attention_weights, v) # (batch, seq_len, emb_dim)

return attention_output # (batch, ?, key_dim)
if mask is not None:
# A CLEAN UP THAT WILL RESTORE MASKED ROWS TO THEIR ORIGINAL VALUES
attention_output = (attention_output * mask.unsqueeze(-1)) + (q * (1-mask).unsqueeze(-1)) # (batch, seq_len, emb_dim)

return attention_output # (batch, seq_len, emb_dim)

class AttentionHead(nn.Module):
def __init__(self, in_dim, key_dim, name=None):
Expand All @@ -112,7 +102,7 @@ def __init__(self, in_dim, key_dim, name=None):
self.query_dense = Dense(self.in_dim, self.key_dim, use_bias=True, activation=None, name = f'{self.name}__Q-Dense')
self.key_dense = Dense(self.in_dim, self.key_dim, use_bias=True, activation=None, name = f'{self.name}__K-Dense')
self.value_dense = Dense(self.in_dim, self.key_dim, use_bias=True, activation=None, name = f'{self.name}__V-Dense')
self.att = Attention(self.key_dim, name = f'{self.name}__Attention')
self.att = Attention(name = f'{self.name}__Attention')

def forward(self, inputs, mask = None): # inputs:(batch, seq_len, emb_size), mask:(batch, seq_len)
q = self.query_dense(inputs) # (batch, seq_len, key_dim)
Expand Down Expand Up @@ -282,4 +272,3 @@ def forward(self, inputs, mask=None):
for encoder_layer in self.transformer_blocks:
outputs = encoder_layer(inputs=outputs, mask=mask)
return outputs # (batch, seq_len, out_dim) <-- USUALLY out_dim = emb_size

3 changes: 2 additions & 1 deletion lpd/metrics/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,5 @@ def update_state(self, y_pred: T.Tensor, y_true: T.Tensor):
y_true_class_idxs = y_true.long()

for row, col in zip(y_pred_class_idxs, y_true_class_idxs):
self.confusion[row][col] += 1
self.confusion[row.cpu()][col.cpu()] += 1

8 changes: 3 additions & 5 deletions setup-nodeps.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,14 @@
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Utilities'
]

setup(
name='lpd-nodeps',
version='0.4.8',
version='0.4.9',
description='A Fast, Flexible Trainer with Callbacks and Extensions for PyTorch',
long_description_content_type='text/markdown',
long_description=README_md,
Expand All @@ -47,7 +45,7 @@
maintainer_email='torch.lpd@gmail.com',
packages=find_packages(exclude=['tests', 'tests/*', 'examples', 'examples/*']),
install_requires=install_requires_nodeps,
python_requires='>=3.6',
python_requires='>=3.9',
classifiers=classifiers,
keywords=['lpd-nodeps']
)
8 changes: 3 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,14 @@
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Utilities'
]

setup(
name='lpd',
version='0.4.8',
version='0.4.9',
description='A Fast, Flexible Trainer with Callbacks and Extensions for PyTorch',
long_description_content_type='text/markdown',
long_description=README_md,
Expand All @@ -49,7 +47,7 @@
maintainer_email='torch.lpd@gmail.com',
packages=find_packages(exclude=['tests', 'tests/*', 'examples', 'examples/*']),
install_requires=install_requires,
python_requires='>=3.6',
python_requires='>=3.9',
classifiers=classifiers,
keywords=['pytorch,trainer,callback,callbacks,earlystopping,tensorboard,modelcheckpoint,checkpoint,layers,dense,metrics,predictor,binary accuracy,extensions,track,monitor,machine,deep learning,neural,networks,AI,keras decay,confusion matrix']
)

0 comments on commit caa7711

Please sign in to comment.