#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ CS224N 2018-19: Homework 3 parser_transitions.py: Algorithms for completing partial parsess. Sahil Chopra """ import sys class PartialParse(object): def __init__(self, sentence): """Initializes this partial parse. @param sentence (list of str): The sentence to be parsed as a list of words. Your code should not modify the sentence. """ # The sentence being parsed is kept for bookkeeping purposes. Do not alter it in your code. self.sentence = sentence ### YOUR CODE HERE (3 Lines) ### Your code should initialize the following fields: ### self.stack: The current stack represented as a list with the top of the stack as the ### last element of the list. ### self.buffer: The current buffer represented as a list with the first item on the ### buffer as the first item of the list ### self.dependencies: The list of dependencies produced so far. Represented as a list of ### tuples where each tuple is of the form (head, dependent). ### Order for this list doesn't matter. ### ### Note: The root token should be represented with the string "ROOT" ### self.stack = ['ROOT'] self.buffer = [] self.buffer = sentence self.dependencies = [] ### END YOUR CODE def parse_step(self, transition): """Performs a single parse step by applying the given transition to this partial parse @param transition (str): A string that equals "S", "LA", or "RA" representing the shift, left-arc, and right-arc transitions. You can assume the provided transition is a legal transition. """ ### YOUR CODE HERE (~7-10 Lines) ### TODO: ### Implement a single parsing step, i.e. the logic for the following as ### described in the pdf handout: ### 1. Shift ### 2. Left Arc ### 3. Right Arc if len(self.buffer) == 0 and len(self.stack)==1: return if transition == 'S': self.stack.append(self.buffer[0]) self.buffer = self.buffer[1:] elif transition == 'LA': self.dependencies.append((self.stack[-1],self.stack[-2])) del self.stack[-2] else: #RA self.dependencies.append((self.stack[-2],self.stack[-1])) del self.stack[-1] ### END YOUR CODE def parse(self, transitions): """Applies the provided transitions to this PartialParse @param transitions (list of str): The list of transitions in the order they should be applied @return dsependencies (list of string tuples): The list of dependencies produced when parsing the sentence. Represented as a list of tuples where each tuple is of the form (head, dependent). """ for transition in transitions: self.parse_step(transition) return self.dependencies def minibatch_parse(sentences, model, batch_size): """Parses a list of sentences in minibatches using a model. @param sentences (list of list of str): A list of sentences to be parsed (each sentence is a list of words and each word is of type string) @param model (ParserModel): The model that makes parsing decisions. It is assumed to have a function model.predict(partial_parses) that takes in a list of PartialParses as input and returns a list of transitions predicted for each parse. That is, after calling transitions = model.predict(partial_parses) transitions[i] will be the next transition to apply to partial_parses[i]. @param batch_size (int): The number of PartialParses to include in each minibatch @return dependencies (list of dependency lists): A list where each element is the dependencies list for a parsed sentence. Ordering should be the same as in sentences (i.e., dependencies[i] should contain the parse for sentences[i]). """ dependencies = [] ### YOUR CODE HERE (~8-10 Lines) ### TODO: ### Implement the minibatch parse algorithm as described in the pdf handout ### ### Note: A shallow copy (as denoted in the PDF) can be made with the "=" sign in python, e.g. ### unfinished_parses = partial_parses[:]. ### Here `unfinished_parses` is a shallow copy of `partial_parses`. ### In Python, a shallow copied list like `unfinished_parses` does not contain new instances ### of the object stored in `partial_parses`. Rather both lists refer to the same objects. ### In our case, `partial_parses` contains a list of partial parses. `unfinished_parses` ### contains references to the same objects. Thus, you should NOT use the `del` operator ### to remove objects from the `unfinished_parses` list. This will free the underlying memory that ### is being accessed by `partial_parses` and may cause your code to crash. partial_parses = [ PartialParse(sent) for sent in sentences] unfinished_parses = partial_parses[:] batch_idx = 0 while len(unfinished_parses) != 0: obj_list = unfinished_parses[:batch_size] transition = model.predict(obj_list) #Parse Step count = 0 for obj in obj_list: obj.parse_step(transition[count]) count+=1 #Clean up count = 0 for obj in obj_list: if len(obj.buffer) == 0 and len(obj.stack)==1: unfinished_parses = unfinished_parses[:max(count,0)] + unfinished_parses[min(count+1,len(partial_parses)):] else: count+=1 dependencies = [obj.dependencies for obj in partial_parses] ### END YOUR CODE return dependencies def test_step(name, transition, stack, buf, deps, ex_stack, ex_buf, ex_deps): """Tests that a single parse step returns the expected output""" pp = PartialParse([]) pp.stack, pp.buffer, pp.dependencies = stack, buf, deps pp.parse_step(transition) stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies))) assert stack == ex_stack, \ "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack) assert buf == ex_buf, \ "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf) assert deps == ex_deps, \ "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) print("{:} test passed!".format(name)) def test_parse_step(): """Simple tests for the PartialParse.parse_step function Warning: these are not exhaustive """ test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [], ("ROOT", "the", "cat"), ("sat",), ()) test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [], ("ROOT", "cat",), ("sat",), (("cat", "the"),)) test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [], ("ROOT", "run",), (), (("run", "fast"),)) def test_parse(): """Simple tests for the PartialParse.parse function Warning: these are not exhaustive """ sentence = ["parse", "this", "sentence"] dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"]) dependencies = tuple(sorted(dependencies)) expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this')) assert dependencies == expected, \ "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected) assert tuple(sentence) == ("parse", "this", "sentence"), \ "parse test failed: the input sentence should not be modified" print("parse test passed!") class DummyModel(object): """Dummy model for testing the minibatch_parse function First shifts everything onto the stack and then does exclusively right arcs if the first word of the sentence is "right", "left" if otherwise. """ def predict(self, partial_parses): return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S" for pp in partial_parses] def test_dependencies(name, deps, ex_deps): """Tests the provided dependencies match the expected dependencies""" deps = tuple(sorted(deps)) assert deps == ex_deps, \ "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) def test_minibatch_parse(): """Simple tests for the minibatch_parse function Warning: these are not exhaustive """ sentences = [["right", "arcs", "only"], ["right", "arcs", "only", "again"], ["left", "arcs", "only"], ["left", "arcs", "only", "again"]] deps = minibatch_parse(sentences, DummyModel(), 2) test_dependencies("minibatch_parse", deps[0], (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs'))) test_dependencies("minibatch_parse", deps[1], (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs'))) test_dependencies("minibatch_parse", deps[2], (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left'))) test_dependencies("minibatch_parse", deps[3], (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only'))) print("minibatch_parse test passed!") if __name__ == '__main__': args = sys.argv if len(args) != 2: raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script") elif args[1] == "part_c": test_parse_step() test_parse() elif args[1] == "part_d": test_minibatch_parse() else: raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script")