Source code for claf.modules.attention.seq_attention

#!/usr/bin/env python3
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

"""
original code from: https://github.com/facebookresearch/DrQA/blob/master/drqa/reader/layers.py
"""

import torch.nn as nn
import torch.nn.functional as F


[docs]class SeqAttnMatch(nn.Module):
    """
    Given sequences X and Y, match sequence Y to each element in X.
    * o_i = sum(alpha_j * y_j) for i in X
    * alpha_j = softmax(y_j * x_i)
    """

    def __init__(self, embed_dim, identity=False):
        super(SeqAttnMatch, self).__init__()
        if not identity:
            self.linear = nn.Linear(embed_dim, embed_dim)
        else:
            self.linear = None

[docs]    def forward(self, x, y, y_mask):
        if self.linear:
            x_proj = self.linear(x.view(-1, x.size(2))).view(x.size())
            x_proj = F.relu(x_proj)
            y_proj = self.linear(y.view(-1, y.size(2))).view(y.size())
            y_proj = F.relu(y_proj)
        else:
            x_proj = x
            y_proj = y

        scores = x_proj.bmm(y_proj.transpose(2, 1))

        y_mask = y_mask.unsqueeze(1).expand(scores.size())
        scores = scores.masked_fill((y_mask == 0), -1e30)

        alpha_flat = F.softmax(scores.view(-1, y.size(1)), -1)
        alpha = alpha_flat.view(-1, x.size(1), y.size(1))

        matched_seq = alpha.bmm(y)
        return matched_seq


[docs]class LinearSeqAttn(nn.Module):
    """
    Self attention over a sequence:
    * o_i = softmax(Wx_i) for x_i in X.
    """

    def __init__(self, input_size):
        super(LinearSeqAttn, self).__init__()
        self.linear = nn.Linear(input_size, 1)

[docs]    def forward(self, x, x_mask):
        x_flat = x.contiguous().view(-1, x.size(-1))
        scores = self.linear(x_flat).view(x.size(0), x.size(1))
        scores.data.masked_fill_((x_mask == 0), -1e30)
        alpha = F.softmax(scores, dim=-1)
        return alpha


[docs]class BilinearSeqAttn(nn.Module):
    """
    A bilinear attention layer over a sequence X w.r.t y:
    * o_i = softmax(x_i'Wy) for x_i in X.
    Optionally don't normalize output weights.
    """

    def __init__(self, x_size, y_size, identity=False, normalize=True):
        super(BilinearSeqAttn, self).__init__()
        self.normalize = normalize

        if not identity:
            self.linear = nn.Linear(y_size, x_size)
        else:
            self.linear = None

[docs]    def forward(self, x, y, x_mask):
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
        xWy.data.masked_fill_((x_mask == 0), -1e30)
        if self.normalize:
            if self.training:
                alpha = F.log_softmax(xWy, dim=-1)
            else:
                alpha = F.softmax(xWy, dim=-1)
        else:
            alpha = xWy.exp()
        return alpha