Source code for claf.modules.encoder.positional


import math
import numpy as np
import torch
import torch.nn as nn


[docs]class PositionalEncoding(nn.Module):
    """
    Positional Encoding
        in "Attention is All You Need" (https://arxiv.org/abs/1706.03762)

    The use of relative position is possible because sin(x+y) and cos(x+y) can be
    expressed in terms of y, sin(x) and cos(x).

    (cf. https://github.com/tensorflow/tensor2tensor/blob/42c3f377f441e5a0f431127d63e71414ead291c4/\
        tensor2tensor/layers/common_attention.py#L388)

    * Args:
        embed_dim: the number of embedding dimension

    * Kwargs:
        max_len: the number of maximum sequence length
    """

    def __init__(self, embed_dim, max_length=2000):
        super(PositionalEncoding, self).__init__()
        signal_sinusoid = self._get_timing_signal(max_length, embed_dim)

        self.register_buffer("position_encoding", signal_sinusoid)

    def _get_timing_signal(self, length, channels, min_timescale=1.0, max_timescale=1.0e4):
        position = np.arange(length)
        num_timescales = channels // 2
        log_timescale_increment = math.log(
            float(max_timescale) / float(min_timescale) / (float(num_timescales) - 1)
        )
        inv_timescales = min_timescale * np.exp(
            np.arange(num_timescales).astype(np.float) * -log_timescale_increment
        )
        scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, 0)

        signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1)
        signal = np.pad(signal, [[0, 0], [0, channels % 2]], "constant", constant_values=[0.0, 0.0])
        signal = signal.reshape([1, length, channels])

        return torch.from_numpy(signal).type(torch.FloatTensor)

[docs]    def forward(self, x):
        x = x + self.position_encoding[:, : x.size(1)]
        return x