#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ CS224N 2018-19: Homework 3 parser_model.py: Feed-Forward Neural Network for Dependency Parsing Sahil Chopra """ import pickle import os import time import torch import torch.nn as nn import torch.nn.functional as F class ParserModel(nn.Module): """ Feedforward neural network with an embedding layer and single hidden layer. The ParserModel will predict which transition should be applied to a given partial parse configuration. PyTorch Notes: - Note that "ParserModel" is a subclass of the "nn.Module" class. In PyTorch all neural networks are a subclass of this "nn.Module". - The "__init__" method is where you define all the layers and their respective parameters (embedding layers, linear layers, dropout layers, etc.). - "__init__" gets automatically called when you create a new instance of your class, e.g. when you write "m = ParserModel()". - Other methods of ParserModel can access variables that have "self." prefix. Thus, you should add the "self." prefix layers, values, etc. that you want to utilize in other ParserModel methods. - For further documentation on "nn.Module" please see https://pytorch.org/docs/stable/nn.html. """ def __init__(self, embeddings, n_features=36, hidden_size=200, n_classes=3, dropout_prob=0.5): """ Initialize the parser model. @param embeddings (Tensor): word embeddings (num_words, embedding_size) @param n_features (int): number of input features @param hidden_size (int): number of hidden units @param n_classes (int): number of output classes @param dropout_prob (float): dropout probability """ super(ParserModel, self).__init__() self.n_features = n_features self.n_classes = n_classes self.dropout_prob = dropout_prob self.embed_size = embeddings.shape[1] self.hidden_size = hidden_size self.pretrained_embeddings = nn.Embedding(embeddings.shape[0], self.embed_size) self.pretrained_embeddings.weight = nn.Parameter(torch.tensor(embeddings)) ### YOUR CODE HERE (~5 Lines) ### TODO: ### 1) Construct `self.embed_to_hidden` linear layer, initializing the weight matrix ### with the `nn.init.xavier_uniform_` function with `gain = 1` (default) ### 2) Construct `self.dropout` layer. ### 3) Construct `self.hidden_to_logits` linear layer, initializing the weight matrix ### with the `nn.init.xavier_uniform_` function with `gain = 1` (default) ### ### Note: Here, we use Xavier Uniform Initialization for our Weight initialization. ### It has been shown empirically, that this provides better initial weights ### for training networks than random uniform initialization. ### For more details checkout this great blogpost: ### http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization ### Hints: ### - After you create a linear layer you can access the weight ### matrix via: ### linear_layer.weight ### ### Please see the following docs for support: ### Linear Layer: https://pytorch.org/docs/stable/nn.html#torch.nn.Linear ### Xavier Init: https://pytorch.org/docs/stable/nn.html#torch.nn.init.xavier_uniform_ ### Dropout: https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout self.embed_to_hidden = nn.Linear(self.embed_size*self.n_features,self.hidden_size) torch.nn.init.xavier_uniform_(self.embed_to_hidden.weight, gain=1) self.dropout = nn.Dropout(dropout_prob) self.hidden_to_logits = nn.Linear(self.hidden_size,self.n_classes) torch.nn.init.xavier_uniform_(self.hidden_to_logits.weight, gain=1) ### END YOUR CODE def embedding_lookup(self, t): """ Utilize `self.pretrained_embeddings` to map input `t` from input tokens (integers) to embedding vectors. PyTorch Notes: - `self.pretrained_embeddings` is a torch.nn.Embedding object that we defined in __init__ - Here `t` is a tensor where each row represents a list of features. Each feature is represented by an integer (input token). - In PyTorch the Embedding object, e.g. `self.pretrained_embeddings`, allows you to go from an index to embedding. Please see the documentation (https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding) to learn how to use `self.pretrained_embeddings` to extract the embeddings for your tensor `t`. @param t (Tensor): input tensor of tokens (batch_size, n_features) @return x (Tensor): tensor of embeddings for words represented in t (batch_size, n_features * embed_size) """ ### YOUR CODE HERE (~1-3 Lines) ### TODO: ### 1) Use `self.pretrained_embeddings` to lookup the embeddings for the input tokens in `t`. ### 2) After you apply the embedding lookup, you will have a tensor shape (batch_size, n_features, embedding_size). ### Use the tensor `view` method to reshape the embeddings tensor to (batch_size, n_features * embedding_size) ### ### Note: In order to get batch_size, you may need use the tensor .size() function: ### https://pytorch.org/docs/stable/tensors.html#torch.Tensor.size ### ### Please see the following docs for support: ### Embedding Layer: https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding ### View: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view t_shape = t.size() acc = self.pretrained_embeddings(t) #print(acc.size()) x = acc.view(t_shape[0],t_shape[1] * self.embed_size) #print(x.shape) ### END YOUR CODE return x def forward(self, t): """ Run the model forward. Note that we will not apply the softmax function here because it is included in the loss function nn.CrossEntropyLoss PyTorch Notes: - Every nn.Module object (PyTorch model) has a `forward` function. - When you apply your nn.Module to an input tensor `t` this function is applied to the tensor. For example, if you created an instance of your ParserModel and applied it to some `t` as follows, the `forward` function would called on `t` and the result would be stored in the `output` variable: model = ParserModel() output = model(t) # this calls the forward function - For more details checkout: https://pytorch.org/docs/stable/nn.html#torch.nn.Module.forward @param t (Tensor): input tensor of tokens (batch_size, n_features) @return logits (Tensor): tensor of predictions (output after applying the layers of the network) without applying softmax (batch_size, n_classes) """ ### YOUR CODE HERE (~3-5 lines) ### TODO: ### 1) Apply `self.embedding_lookup` to `t` to get the embeddings ### 2) Apply `embed_to_hidden` linear layer to the embeddings ### 3) Apply relu non-linearity to the output of step 2 to get the hidden units. ### 4) Apply dropout layer to the output of step 3. ### 5) Apply `hidden_to_logits` layer to the output of step 4 to get the logits. ### ### Note: We do not apply the softmax to the logits here, because ### the loss function (torch.nn.CrossEntropyLoss) applies it more efficiently. ### ### Please see the following docs for support: ### ReLU: https://pytorch.org/docs/stable/nn.html?highlight=relu#torch.nn.functional.relu embeddings = self.embedding_lookup(t) hidden = self.embed_to_hidden(embeddings) hidden_relu = nn.functional.relu(hidden) dropped_out = self.dropout(hidden_relu) logits = self.hidden_to_logits(dropped_out) ### END YOUR CODE return logits