Source code for claf.modules.encoder.positional


import math
import numpy as np
import torch
import torch.nn as nn


[docs]class PositionalEncoding(nn.Module): """ Positional Encoding in "Attention is All You Need" (https://arxiv.org/abs/1706.03762) The use of relative position is possible because sin(x+y) and cos(x+y) can be expressed in terms of y, sin(x) and cos(x). (cf. https://github.com/tensorflow/tensor2tensor/blob/42c3f377f441e5a0f431127d63e71414ead291c4/\ tensor2tensor/layers/common_attention.py#L388) * Args: embed_dim: the number of embedding dimension * Kwargs: max_len: the number of maximum sequence length """ def __init__(self, embed_dim, max_length=2000): super(PositionalEncoding, self).__init__() signal_sinusoid = self._get_timing_signal(max_length, embed_dim) self.register_buffer("position_encoding", signal_sinusoid) def _get_timing_signal(self, length, channels, min_timescale=1.0, max_timescale=1.0e4): position = np.arange(length) num_timescales = channels // 2 log_timescale_increment = math.log( float(max_timescale) / float(min_timescale) / (float(num_timescales) - 1) ) inv_timescales = min_timescale * np.exp( np.arange(num_timescales).astype(np.float) * -log_timescale_increment ) scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, 0) signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) signal = np.pad(signal, [[0, 0], [0, channels % 2]], "constant", constant_values=[0.0, 0.0]) signal = signal.reshape([1, length, channels]) return torch.from_numpy(signal).type(torch.FloatTensor)
[docs] def forward(self, x): x = x + self.position_encoding[:, : x.size(1)] return x