diff --git a/DECIMER/Predictor_usingCheckpoints.py b/DECIMER/Predictor_usingCheckpoints.py index 5d4acd4..c612856 100644 --- a/DECIMER/Predictor_usingCheckpoints.py +++ b/DECIMER/Predictor_usingCheckpoints.py @@ -1,10 +1,9 @@ import os import sys import tensorflow as tf - +from typing import List, Tuple import pickle import pystow -from selfies import decoder import Transformer_decoder if int(tf.__version__.split(".")[1]) <= 10: @@ -116,6 +115,9 @@ def main(): else: SMILES = predict_SMILES(sys.argv[1]) print(SMILES) + SMILES_with_confidence = predict_SMILES_with_confidence(sys.argv[1]) + for tup in SMILES_with_confidence: + print(tup) class DECIMER_Predictor(tf.Module): @@ -126,6 +128,16 @@ def __init__(self, encoder, tokenizer, transformer, max_length): self.max_length = max_length def __call__(self, Decoded_image): + """ + Run the DECIMER predictor model when called. + Usage of predict_SMILES or predict_SMILES_with_confidence is recommended instead + + Args: + Decoded_image (_type_): output of config.decode_image + + Returns: + Tuple[tf.Tensor, tf.Tensor]: predicted tokens, confidence values + """ assert isinstance(Decoded_image, tf.Tensor) _image_batch = tf.expand_dims(Decoded_image, 0) @@ -140,6 +152,7 @@ def __call__(self, Decoded_image): output_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True) output_array = output_array.write(0, start_token) + confidence_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) for t in tf.range(max_length): output = tf.transpose(output_array.stack()) @@ -154,31 +167,87 @@ def __call__(self, Decoded_image): predictions = prediction_batch[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) - + confidence = predictions[-1][-1][int(predicted_id)] output_array = output_array.write(t + 1, predicted_id[0]) - + confidence_array = confidence_array.write(t + 1, confidence) if predicted_id == end_token: break output = tf.transpose(output_array.stack()) - return output + return output, confidence_array.stack() + + +def detokenize_output( + predicted_array: tf.Tensor +) -> str: + """ + This function takes the predicted array of tokens and returns the predicted SMILES + string. -def detokenize_output(predicted_array): + Args: + predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens) + + Returns: + str: SMILES string + """ outputs = [tokenizer.index_word[i] for i in predicted_array[0].numpy()] prediction = ( "".join([str(elem) for elem in outputs]) .replace("", "") .replace("", "") ) - return prediction +def detokenize_output_add_confidence( + predicted_array: tf.Tensor, + confidence_array: tf.Tensor, +) -> List[Tuple[str, float]]: + """ + This function takes the predicted array of tokens as well as the confidence values + returned by the Transformer Decoder and returns a list of tuples + that contain each token of the predicted SMILES string and the confidence + value. + + Args: + predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens) + + Returns: + str: SMILES string + """ + prediction_with_confidence = [(tokenizer.index_word[predicted_array[0].numpy()[i]], + confidence_array[i].numpy()) + for i in range(len(confidence_array))] + decoded_prediction_with_confidence = list([(utils.decoder(tok), conf) for tok, conf + in prediction_with_confidence[1:-1]]) + decoded_prediction_with_confidence.append(prediction_with_confidence[-1]) + return decoded_prediction_with_confidence + # Initiate the DECIMER class DECIMER = DECIMER_Predictor(encoder, tokenizer, transformer, MAX_LEN) +def predict_SMILES_with_confidence(image_path: str) -> List[Tuple[str, float]]: + """ + This function takes an image path (str) and returns a list of tuples + that contain each token of the predicted SMILES string and the confidence + level from the last layer of the Transformer decoder. -def predict_SMILES(image_path: str): + Args: + image_path (str): Path of chemical structure depiction image + + Returns: + (List[Tuple[str, float]]): Tuples that contain the tokens and the confidence + values of the predicted SMILES + """ + decodedImage = config.decode_image(image_path) + predicted_tokens, confidence_values = DECIMER(tf.constant(decodedImage)) + predicted_SMILES_with_confidence = detokenize_output_add_confidence( + predicted_tokens, + confidence_values) + return predicted_SMILES_with_confidence + + +def predict_SMILES(image_path: str) -> str: """ This function takes an image path (str) and returns the SMILES representation of the depicted molecule (str). @@ -190,9 +259,8 @@ def predict_SMILES(image_path: str): (str): SMILES representation of the molecule in the input image """ decodedImage = config.decode_image(image_path) - predicted_tokens = DECIMER(tf.constant(decodedImage)) + predicted_tokens, _ = DECIMER(tf.constant(decodedImage)) predicted_SMILES = utils.decoder(detokenize_output(predicted_tokens)) - return predicted_SMILES diff --git a/DECIMER/Repack_model.py b/DECIMER/Repack_model.py index f6554c0..aa61f00 100644 --- a/DECIMER/Repack_model.py +++ b/DECIMER/Repack_model.py @@ -1,11 +1,10 @@ import os import tensorflow as tf - +from typing import List, Tuple import pickle import DECIMER.Efficient_Net_encoder as Efficient_Net_encoder import DECIMER.Transformer_decoder as Transformer_decoder import DECIMER.config as config -import DECIMER.utils as utils print(tf.__version__) @@ -84,16 +83,51 @@ start_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1]) -def detokenize_output(predicted_array): +def detokenize_output( + predicted_array: tf.Tensor +) -> str: + """ + This function takes the predicted array of tokens and returns the predicted SMILES + string. + + Args: + predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens) + + Returns: + str: SMILES string + """ outputs = [tokenizer.index_word[i] for i in predicted_array[0].numpy()] prediction = ( "".join([str(elem) for elem in outputs]) .replace("", "") .replace("", "") ) - return prediction +def detokenize_output_add_confidence( + predicted_array: tf.Tensor, + confidence_array: tf.Tensor, +) -> List[Tuple[str, float]]: + """ + This function takes the predicted array of tokens as well as the confidence values + returned by the Transformer Decoder and returns a list of tuples + that contain each token of the predicted SMILES string and the confidence + value. + + Args: + predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens) + + Returns: + str: SMILES string + """ + prediction_with_confidence = [(tokenizer.index_word[predicted_array[0].numpy()[i]], + confidence_array[i].numpy()) + for i in range(len(confidence_array))] + decoded_prediction_with_confidence = list([(utils.decoder(tok), conf) for tok, conf + in prediction_with_confidence[1:-1]]) + decoded_prediction_with_confidence.append(prediction_with_confidence[-1]) + return decoded_prediction_with_confidence + class DECIMER_Predictor(tf.Module): """This is a class which takes care of inference. It loads the saved checkpoint and the necessary @@ -128,8 +162,6 @@ def __call__(self, Decoded_image): output (tf.Tensor[tf.int64]): predicted output as an array. """ assert isinstance(Decoded_image, tf.Tensor) - if len(Decoded_image.shape) == 0: - sentence = Decoded_image[tf.newaxis] _image_batch = tf.expand_dims(Decoded_image, 0) _image_embedding = encoder(_image_batch, training=False) @@ -143,6 +175,7 @@ def __call__(self, Decoded_image): output_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True) output_array = output_array.write(0, start_token) + confidence_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) for t in tf.range(max_length): output = tf.transpose(output_array.stack()) @@ -155,16 +188,16 @@ def __call__(self, Decoded_image): # select the last word from the seq_len dimension predictions = prediction_batch[:, -1:, :] # (batch_size, 1, vocab_size) - + predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) - + confidence = predictions[-1][-1][int(predicted_id)] output_array = output_array.write(t + 1, predicted_id[0]) - + confidence_array = confidence_array.write(t + 1, confidence) if predicted_id == end_token: break output = tf.transpose(output_array.stack()) - return output + return output, confidence_array.stack() DECIMER = DECIMER_Predictor(encoder, tokenizer, transformer, MAX_LEN) diff --git a/DECIMER/Transformer_decoder.py b/DECIMER/Transformer_decoder.py index 7e91e29..9ed77da 100644 --- a/DECIMER/Transformer_decoder.py +++ b/DECIMER/Transformer_decoder.py @@ -185,7 +185,7 @@ def __init__( for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate) - self.final_layer = tf.keras.layers.Dense(target_vocab_size) + self.final_layer = tf.keras.layers.Dense(target_vocab_size, activation="softmax") def call(self, x, enc_output, training, look_ahead_mask=None, padding_mask=None): seq_len = tf.shape(x)[1] diff --git a/DECIMER/decimer.py b/DECIMER/decimer.py index 28ad310..f88bc02 100644 --- a/DECIMER/decimer.py +++ b/DECIMER/decimer.py @@ -5,13 +5,14 @@ import pystow import shutil import tensorflow as tf +from typing import List, Tuple import DECIMER.config as config import DECIMER.utils as utils # Silence tensorflow model loading warnings. logging.getLogger("absl").setLevel("ERROR") -# Silence tensorflow errors. optional not recommened if your model is not working properly. +# Silence tensorflow errors - not recommended if your model is not working properly. os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Set the absolute path @@ -87,10 +88,34 @@ def detokenize_output(predicted_array: int) -> str: .replace("", "") .replace("", "") ) - return prediction +def detokenize_output_add_confidence( + predicted_array: tf.Tensor, + confidence_array: tf.Tensor, +) -> List[Tuple[str, float]]: + """ + This function takes the predicted array of tokens as well as the confidence values + returned by the Transformer Decoder and returns a list of tuples + that contain each token of the predicted SMILES string and the confidence + value. + + Args: + predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens) + + Returns: + str: SMILES string + """ + prediction_with_confidence = [(tokenizer.index_word[predicted_array[0].numpy()[i]], + confidence_array[i].numpy()) + for i in range(len(confidence_array))] + decoded_prediction_with_confidence = list([(utils.decoder(tok), conf) for tok, conf + in prediction_with_confidence[1:-1]]) + decoded_prediction_with_confidence.append(prediction_with_confidence[-1]) + return decoded_prediction_with_confidence + + # Load DECIMER model_packed DECIMER_V2 = tf.saved_model.load(default_path.as_posix() + "/DECIMER_model/") @@ -107,11 +132,31 @@ def predict_SMILES(image_path: str) -> str: (str): SMILES representation of the molecule in the input image """ chemical_structure = config.decode_image(image_path) - predicted_tokens = DECIMER_V2(chemical_structure) + predicted_tokens, _ = DECIMER_V2(chemical_structure) predicted_SMILES = utils.decoder(detokenize_output(predicted_tokens)) - return predicted_SMILES +def predict_SMILES_with_confidence(image_path: str) -> List[Tuple[str, float]]: + """ + This function takes an image path (str) and returns a list of tuples + that contain each token of the predicted SMILES string and the confidence + level from the last layer of the Transformer decoder. + + Args: + image_path (str): Path of chemical structure depiction image + + Returns: + (List[Tuple[str, float]]): Tuples that contain the tokens and the confidence + values of the predicted SMILES + """ + decodedImage = config.decode_image(image_path) + predicted_tokens, confidence_values = DECIMER_V2(tf.constant(decodedImage)) + predicted_SMILES_with_confidence = detokenize_output_add_confidence( + predicted_tokens, + confidence_values) + return predicted_SMILES_with_confidence + + if __name__ == "__main__": main()