# LSTM layer in Encoder
lstm_layer = tf.keras.layers.LSTM(
    units,  # dimensionality of the output space
    return_sequences=True,  # Pass output sequence and state to Decoder
    return_state=True,
)

dec_input = targ[ : , :-1 ]   # Ground Truth Sequence
real = targ[ : , 1: ]         # Final Token Sequence
pred = decoder(dec_input, decoder_initial_state)
logits = pred.rnn_output
loss = loss_function(real, logits)

# Luong Attention
attention_mechanism = tfa.seq2seq.LuongAttention(
    dec_units, memory, memory_sequence_length
)
rnn_cell = tfa.seq2seq.AttentionWrapper(
    tf.keras.layers.LSTMCell,
    attention_mechanism,
    attention_layer_size=dec_units,
)

sampler = tfa.seq2seq.sampler.TrainingSampler()
decoder = tfa.seq2seq.BasicDecoder(rnn_cell, sampler=sampler, output_layer=fc)


import tensorflow as tf
import tensorflow_addons as tfa
import time

from NMTDataset import NMTDataset
from models import Encoder, Decoder
from functions import *

def get_nmt():
    """Get the link to the dataset.
    If the dataset does not exist, download it manually and assign new path."""
    path_to_file = "./dict/vie-eng/vie.txt"
    return path_to_file


# Configuration parameters
# DataSet
BUFFER_SIZE = 256000
BATCH_SIZE = 64
num_examples = 10000 # Let's limit the #training examples for faster training
# Neural Network parameters
embedding_dim = 256
units = 1024
steps_per_epoch = num_examples//BATCH_SIZE


# Load DataSet
dataset_creator = NMTDataset("en-vie", get_nmt())
train_dataset, val_dataset, inp_lang, targ_lang = dataset_creator.call(
    num_examples, BUFFER_SIZE, BATCH_SIZE
)
example_input_batch, example_target_batch = next(iter(train_dataset))
vocab_inp_size = len(inp_lang.word_index) + 1
vocab_tar_size = len(targ_lang.word_index) + 1
max_length_input = example_input_batch.shape[1]
max_length_output = example_target_batch.shape[1]


# Test Encoder Stack
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_h, sample_c = encoder(example_input_batch, sample_hidden)


# Test decoder stack
decoder = Decoder(
    vocab_tar_size,
    embedding_dim,
    units,
    BATCH_SIZE,
    max_length_input,
    max_length_output,
    "luong",
)
sample_x = tf.random.uniform((BATCH_SIZE, max_length_output))
decoder.attention_mechanism.setup_memory(sample_output)
initial_state = decoder.build_initial_state(
    BATCH_SIZE, [sample_h, sample_c], tf.float32
)

sample_decoder_outputs = decoder(sample_x, initial_state)


EPOCHS = 50

for epoch in range(EPOCHS):
    start = time.time()

    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0

    for (batch, (inp, targ)) in enumerate(train_dataset.take(steps_per_epoch)):
        batch_loss = train_step(
            inp, targ, enc_hidden, BATCH_SIZE, encoder, decoder
        )
        total_loss += batch_loss

    print(
        "Epoch {} Loss {:.4f} taken time  {:.2f} sec".format(
            epoch + 1, total_loss / steps_per_epoch, time.time() - start
        )
    )

Epoch 1 Loss 0.8033 taken time  31.49 sec
Epoch 2 Loss 0.6510 taken time  20.96 sec
Epoch 3 Loss 0.5901 taken time  20.89 sec
Epoch 4 Loss 0.5402 taken time  22.13 sec
Epoch 5 Loss 0.4794 taken time  21.52 sec
Epoch 6 Loss 0.4234 taken time  22.14 sec
Epoch 7 Loss 0.3720 taken time  22.09 sec
Epoch 8 Loss 0.3217 taken time  21.15 sec
Epoch 9 Loss 0.2792 taken time  21.32 sec
Epoch 10 Loss 0.2390 taken time  21.46 sec
Epoch 11 Loss 0.1999 taken time  21.26 sec
Epoch 12 Loss 0.1673 taken time  21.69 sec
Epoch 13 Loss 0.1405 taken time  21.28 sec
Epoch 14 Loss 0.1179 taken time  21.12 sec
Epoch 15 Loss 0.1001 taken time  21.09 sec
Epoch 16 Loss 0.0883 taken time  21.13 sec
Epoch 17 Loss 0.0794 taken time  21.14 sec
Epoch 18 Loss 0.0685 taken time  21.34 sec
Epoch 19 Loss 0.0627 taken time  22.25 sec
Epoch 20 Loss 0.0620 taken time  20.43 sec
Epoch 21 Loss 0.0640 taken time  20.49 sec
Epoch 22 Loss 0.0591 taken time  20.85 sec
Epoch 23 Loss 0.0603 taken time  20.31 sec
Epoch 24 Loss 0.0585 taken time  20.19 sec
Epoch 25 Loss 0.0739 taken time  20.15 sec
Epoch 26 Loss 0.0830 taken time  20.26 sec
Epoch 27 Loss 0.1329 taken time  20.11 sec
Epoch 28 Loss 0.1469 taken time  20.13 sec
Epoch 29 Loss 0.1743 taken time  20.14 sec
Epoch 30 Loss 0.1684 taken time  20.07 sec
Epoch 31 Loss 0.1461 taken time  20.08 sec
Epoch 32 Loss 0.2911 taken time  20.07 sec
Epoch 33 Loss 0.4087 taken time  20.01 sec
Epoch 34 Loss 0.3721 taken time  20.01 sec
Epoch 35 Loss 0.6717 taken time  20.22 sec
Epoch 36 Loss 0.7545 taken time  20.01 sec
Epoch 37 Loss 1.0137 taken time  20.00 sec
Epoch 38 Loss 0.6168 taken time  20.13 sec
Epoch 39 Loss 0.5238 taken time  20.07 sec
Epoch 40 Loss 0.4743 taken time  20.08 sec
Epoch 41 Loss 0.4302 taken time  20.04 sec
Epoch 42 Loss 0.3988 taken time  20.01 sec
Epoch 43 Loss 0.3821 taken time  20.13 sec
Epoch 44 Loss 0.3523 taken time  20.01 sec
Epoch 45 Loss 0.3216 taken time  20.04 sec
Epoch 46 Loss 0.3021 taken time  20.02 sec
Epoch 47 Loss 0.2868 taken time  20.00 sec
Epoch 48 Loss 0.2964 taken time  20.04 sec
Epoch 49 Loss 0.3068 taken time  20.02 sec
Epoch 50 Loss 0.2626 taken time  20.04 sec


def translate(sentence):
    result = evaluate_sentence(
        dataset_creator.preprocess_sentence(sentence),
        inp_lang,
        targ_lang,
        encoder,
        decoder,
        max_length_input,
        units,
    )
    print(result)
    result = targ_lang.sequences_to_texts(
        result
    )  # Transform vertor numbers to words
    print("Input: %s" % (sentence))
    print("Translation: {}".format(result))


translate(u'Tôi thích hoa.')

[[  5  41   6 647   4   3]]
Input: Tôi thích hoa.
Translation: ['i like to travel . <end>']


translate(u'Trời nắng.')

[[ 17  16  53 200   4   3]]
Input: Trời nắng.
Translation: ['it s very well . <end>']


translate(u'Anh yêu em.')

[[  15   16  322  150   39   12 1898  326    4    3]]
Input: Anh yêu em.
Translation: ['he s pretty love with a slight free . <end>']


translate(u'Tiếp tục đi.')

[[176  36 347   4   3]]
Input: Tiếp tục đi.
Translation: ['keep your eyes . <end>']

2. Translation Model¶

Introduction¶

Overview¶

Training Task¶

Data cleaning¶

Padding¶

Start and End of a Sentence¶

Out of Vocabulary¶

Attention¶

Decoding during Training¶

Decoding during Inferencing¶

Demo¶