Unable to use with real data #12

Bashima · 2019-05-31T17:59:20Z

I am trying to use this attention code (Monotonic) for spanish to english translation (http://download.tensorflow.org/data/spa-eng.zip).
I am getting this error

Input 0 is incompatible with layer AttentionDecoder: expected ndim=3, found ndim=2

I would really appreciate if you can help me out. I am quite new with RNN and Keras.

Following is the code I am using.

`

path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://download.tensorflow.org/data/spa-eng.zip', 
    extract=True)

path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"

def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
    w = unicode_to_ascii(w.lower().strip())
    
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)

    w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
    
    w = w.rstrip().strip()
    
    w = '<start> ' + w + ' <end>'
    return w

def create_dataset(path, num_examples):
    lines = open(path, encoding='UTF-8').read().strip().split('\n')
    
    word_pairs = [[preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:num_examples]]
    
    return word_pairs

class LanguageIndex():
  def __init__(self, lang):
    self.lang = lang
    self.word2idx = {}
    self.idx2word = {}
    self.vocab = set()
    
    self.create_index()
    
  def create_index(self):
    for phrase in self.lang:
      self.vocab.update(phrase.split(' '))
    
    self.vocab = sorted(self.vocab)
    
    self.word2idx['<pad>'] = 0
    for index, word in enumerate(self.vocab):
      self.word2idx[word] = index + 1
    
    for word, index in self.word2idx.items():
      self.idx2word[index] = word
    
def max_length(tensor):
    return max(len(t) for t in tensor)


def load_dataset(path, num_examples):
    pairs = create_dataset(path, num_examples)
    inp_lang = LanguageIndex(sp for en, sp in pairs)
    targ_lang = LanguageIndex(en for en, sp in pairs)
    input_tensor = [[inp_lang.word2idx[s] for s in sp.split(' ')] for en, sp in pairs]
    target_tensor = [[targ_lang.word2idx[s] for s in en.split(' ')] for en, sp in pairs]

    max_length_inp, max_length_tar = max_length(input_tensor), max_length(target_tensor)
    input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor, 
                                                                 maxlen=max_length_inp,
                                                                 padding='post')
    
    target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor, 
                                                                  maxlen=max_length_tar, 
                                                                  padding='post')
    
    return input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_tar

num_examples = 30000
input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_targ = load_dataset(path_to_file, num_examples)

input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val)

BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
embedding_dim = 256
units = 1024 # unit is the dimension of the output
vocab_inp_size = len(inp_lang.word2idx)
vocab_tar_size = len(targ_lang.word2idx)

print(vocab_inp_size, vocab_tar_size)

dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(BATCH_SIZE))


def gru(units):
    if tf.test.is_gpu_available():
        print("GPU")
        return tf.keras.layers.CuDNNGRU(units, 
                                        return_sequences=True, 
                                        return_state=True,
                                        recurrent_initializer='glorot_uniform')
    else:
        print("NO GPU")
        return tf.keras.layers.GRU(units, 
                               return_sequences=True, 
                               return_state=True, 
                               recurrent_activation='sigmoid', 
                               recurrent_initializer='glorot_uniform')

class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = gru(self.enc_units)
        
    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state = hidden)        
        return output, state
    
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

EPOCHA = 10
inputs = Input(shape=(None,), dtype='int64')
outp_true = Input(shape=(None,), dtype='int64')
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
attention_decoder = AttentionDecoder(units=units, alphabet_size=vocab_tar_size,
                                     embedding_dim=embedding_dim,
                                     is_monotonic=True,
                                     normalize_energy=True)

output = attention_decoder([inputs, outp_true])

model = Model(inputs=[inputs, outp_true], outputs=[output])
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adadelta',
    metrics=['accuracy'])
model.summary()

dec_input = tf.expand_dims([targ_lang.word2idx['<start>']] * BATCH_SIZE, 1)  
hidden = encoder.initialize_hidden_state()
for epoch in range(EPOCHS):
    for (batch, (inp, targ)) in enumerate (dataset):
        print(batch)
        enc_output, enc_hidden = encoder(inp, hidden)
        print(enc_hidden.shape)
        model.fit([enc_hidden, targ], targ,
           epochs=1, batch_size = 1,
           validation_data=([input_tensor_val, np.squeeze(target_tensor_val, axis=-1)], target_tensor_val))`

The text was updated successfully, but these errors were encountered:

Victor-Almeida · 2019-10-02T05:12:15Z

If you need 3 dimensions instead of 2 you can create a new dimension with a single position. For example, if your array shape is (m, n) you reshape it to (m, n, 1).

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unable to use with real data #12

Unable to use with real data #12

Bashima commented May 31, 2019 •

edited

Loading

Victor-Almeida commented Oct 2, 2019

Unable to use with real data #12

Unable to use with real data #12

Comments

Bashima commented May 31, 2019 • edited Loading

Victor-Almeida commented Oct 2, 2019

Bashima commented May 31, 2019 •

edited

Loading