CoCalc -- parser

GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a3/parser_model.py
⁹⁸⁴ views
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
CS224N 2021-2022: Homework 3
5
parser_model.py: Feed-Forward Neural Network for Dependency Parsing
6
Sahil Chopra <[email protected]>
7
Haoshen Hong <[email protected]>
8
"""
9
import argparse
10
import numpy as np
11

12
import torch
13
import torch.nn as nn
14
import torch.nn.functional as F
15

16
class ParserModel(nn.Module):
17
    """ Feedforward neural network with an embedding layer and two hidden layers.
18
    The ParserModel will predict which transition should be applied to a
19
    given partial parse configuration.
20

21
    PyTorch Notes:
22
        - Note that "ParserModel" is a subclass of the "nn.Module" class. In PyTorch all neural networks
23
            are a subclass of this "nn.Module".
24
        - The "__init__" method is where you define all the layers and parameters
25
            (embedding layers, linear layers, dropout layers, etc.).
26
        - "__init__" gets automatically called when you create a new instance of your class, e.g.
27
            when you write "m = ParserModel()".
28
        - Other methods of ParserModel can access variables that have "self." prefix. Thus,
29
            you should add the "self." prefix layers, values, etc. that you want to utilize
30
            in other ParserModel methods.
31
        - For further documentation on "nn.Module" please see https://pytorch.org/docs/stable/nn.html.
32
    """
33
    def __init__(self, embeddings, n_features=36,
34
        hidden_size=200, n_classes=3, dropout_prob=0.5):
35
        """ Initialize the parser model.
36

37
        @param embeddings (ndarray): word embeddings (num_words, embedding_size)
38
        @param n_features (int): number of input features
39
        @param hidden_size (int): number of hidden units
40
        @param n_classes (int): number of output classes
41
        @param dropout_prob (float): dropout probability
42
        """
43
        super(ParserModel, self).__init__()
44
        self.n_features = n_features
45
        self.n_classes = n_classes
46
        self.dropout_prob = dropout_prob
47
        self.embed_size = embeddings.shape[1]
48
        self.hidden_size = hidden_size
49
        self.embeddings = nn.Parameter(torch.tensor(embeddings))
50

51
        ### YOUR CODE HERE (~9-10 Lines)
52
        ### TODO:
53
        ###     1) Declare `self.embed_to_hidden_weight` and `self.embed_to_hidden_bias` as `nn.Parameter`.
54
        ###        Initialize weight with the `nn.init.xavier_uniform_` function and bias with `nn.init.uniform_`
55
        ###        with default parameters.
56
        ###     2) Construct `self.dropout` layer.
57
        ###     3) Declare `self.hidden_to_logits_weight` and `self.hidden_to_logits_bias` as `nn.Parameter`.
58
        ###        Initialize weight with the `nn.init.xavier_uniform_` function and bias with `nn.init.uniform_`
59
        ###        with default parameters.
60
        ###
61
        ### Note: Trainable variables are declared as `nn.Parameter` which is a commonly used API
62
        ###       to include a tensor into a computational graph to support updating w.r.t its gradient.
63
        ###       Here, we use Xavier Uniform Initialization for our Weight initialization.
64
        ###       It has been shown empirically, that this provides better initial weights
65
        ###       for training networks than random uniform initialization.
66
        ###       For more details checkout this great blogpost:
67
        ###             http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization
68
        ###
69
        ### Please see the following docs for support:
70
        ###     nn.Parameter: https://pytorch.org/docs/stable/nn.html#parameters
71
        ###     Initialization: https://pytorch.org/docs/stable/nn.init.html
72
        ###     Dropout: https://pytorch.org/docs/stable/nn.html#dropout-layers
73
        ### 
74
        ### See the PDF for hints.
75
        self.embed_to_hidden_weight = nn.Parameter(torch.empty(self.embed_size * self.n_features, self.hidden_size))
76
        self.embed_to_hidden_bias = nn.Parameter(torch.empty(self.hidden_size))
77
        nn.init.xavier_uniform_(self.embed_to_hidden_weight)
78
        nn.init.uniform_(self.embed_to_hidden_bias)
79
        self.dropout = nn.Dropout(p=self.dropout_prob)
80
        self.hidden_to_logits_weight = nn.Parameter(torch.empty(self.hidden_size, self.n_classes))
81
        self.hidden_to_logits_bias = nn.Parameter(torch.empty(self.n_classes))
82
        nn.init.xavier_uniform_(self.hidden_to_logits_weight)
83
        nn.init.uniform_(self.hidden_to_logits_bias)
84
        ### END YOUR CODE
85

86
    def embedding_lookup(self, w):
87
        """ Utilize `w` to select embeddings from embedding matrix `self.embeddings`
88
            @param w (Tensor): input tensor of word indices (batch_size, n_features)
89

90
            @return x (Tensor): tensor of embeddings for words represented in w
91
                                (batch_size, n_features * embed_size)
92
        """
93

94
        ### YOUR CODE HERE (~1-4 Lines)
95
        ### TODO:
96
        ###     1) For each index `i` in `w`, select `i`th vector from self.embeddings
97
        ###     2) Reshape the tensor using `view` function if necessary
98
        ###
99
        ### Note: All embedding vectors are stacked and stored as a matrix. The model receives
100
        ###       a list of indices representing a sequence of words, then it calls this lookup
101
        ###       function to map indices to sequence of embeddings.
102
        ###
103
        ###       This problem aims to test your understanding of embedding lookup,
104
        ###       so DO NOT use any high level API like nn.Embedding
105
        ###       (we are asking you to implement that!). Pay attention to tensor shapes
106
        ###       and reshape if necessary. Make sure you know each tensor's shape before you run the code!
107
        ###
108
        ### Pytorch has some useful APIs for you, and you can use either one
109
        ### in this problem (except nn.Embedding). These docs might be helpful:
110
        ###     Index select: https://pytorch.org/docs/stable/torch.html#torch.index_select
111
        ###     Gather: https://pytorch.org/docs/stable/torch.html#torch.gather
112
        ###     View: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view
113
        ###     Flatten: https://pytorch.org/docs/stable/generated/torch.flatten.html
114
        N = w.shape[0]
115
        x = torch.index_select(self.embeddings, 0, torch.flatten(w)).view(N, -1)
116
        ### END YOUR CODE
117
        return x
118

119

120
    def forward(self, w):
121
        """ Run the model forward.
122

123
            Note that we will not apply the softmax function here because it is included in the loss function nn.CrossEntropyLoss
124

125
            PyTorch Notes:
126
                - Every nn.Module object (PyTorch model) has a `forward` function.
127
                - When you apply your nn.Module to an input tensor `w` this function is applied to the tensor.
128
                    For example, if you created an instance of your ParserModel and applied it to some `w` as follows,
129
                    the `forward` function would called on `w` and the result would be stored in the `output` variable:
130
                        model = ParserModel()
131
                        output = model(w) # this calls the forward function
132
                - For more details checkout: https://pytorch.org/docs/stable/nn.html#torch.nn.Module.forward
133

134
        @param w (Tensor): input tensor of tokens (batch_size, n_features)
135

136
        @return logits (Tensor): tensor of predictions (output after applying the layers of the network)
137
                                 without applying softmax (batch_size, n_classes)
138
        """
139
        ### YOUR CODE HERE (~3-5 lines)
140
        ### TODO:
141
        ###     Complete the forward computation as described in write-up. In addition, include a dropout layer
142
        ###     as decleared in `__init__` after ReLU function.
143
        ###
144
        ### Note: We do not apply the softmax to the logits here, because
145
        ### the loss function (torch.nn.CrossEntropyLoss) applies it more efficiently.
146
        ###
147
        ### Please see the following docs for support:
148
        ###     Matrix product: https://pytorch.org/docs/stable/torch.html#torch.matmul
149
        ###     ReLU: https://pytorch.org/docs/stable/nn.html?highlight=relu#torch.nn.functional.rel
150
        x = self.embedding_lookup(w) #(batch_size, n_features * embed_size)
151
        h = F.relu(x.mm(self.embed_to_hidden_weight) + self.embed_to_hidden_bias)
152
        logits = h.mm(self.hidden_to_logits_weight) + self.hidden_to_logits_bias
153
        ### END YOUR CODE
154
        return logits
155

156

157
if __name__ == "__main__":
158

159
    parser = argparse.ArgumentParser(description='Simple sanity check for parser_model.py')
160
    parser.add_argument('-e', '--embedding', action='store_true', help='sanity check for embeding_lookup function')
161
    parser.add_argument('-f', '--forward', action='store_true', help='sanity check for forward function')
162
    args = parser.parse_args()
163

164
    embeddings = np.zeros((100, 30), dtype=np.float32)
165
    model = ParserModel(embeddings)
166

167
    def check_embedding():
168
        inds = torch.randint(0, 100, (4, 36), dtype=torch.long)
169
        selected = model.embedding_lookup(inds)
170
        assert np.all(selected.data.numpy() == 0), "The result of embedding lookup: " \
171
                                      + repr(selected) + " contains non-zero elements."
172

173
    def check_forward():
174
        inputs =torch.randint(0, 100, (4, 36), dtype=torch.long)
175
        out = model(inputs)
176
        expected_out_shape = (4, 3)
177
        assert out.shape == expected_out_shape, "The result shape of forward is: " + repr(out.shape) + \
178
                                                " which doesn't match expected " + repr(expected_out_shape)
179

180
    if args.embedding:
181
        check_embedding()
182
        print("Embedding_lookup sanity check passes!")
183

184
    if args.forward:
185
        check_forward()
186
        print("Forward sanity check passes!")
187

188
Product

Resources

Company