235 lines
10 KiB
Python
235 lines
10 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
CS224N 2018-19: Homework 3
|
||
|
|
parser_transitions.py: Algorithms for completing partial parsess.
|
||
|
|
Sahil Chopra <schopra8@stanford.edu>
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sys
|
||
|
|
|
||
|
|
class PartialParse(object):
|
||
|
|
def __init__(self, sentence):
|
||
|
|
"""Initializes this partial parse.
|
||
|
|
|
||
|
|
@param sentence (list of str): The sentence to be parsed as a list of words.
|
||
|
|
Your code should not modify the sentence.
|
||
|
|
"""
|
||
|
|
# The sentence being parsed is kept for bookkeeping purposes. Do not alter it in your code.
|
||
|
|
self.sentence = sentence
|
||
|
|
|
||
|
|
### YOUR CODE HERE (3 Lines)
|
||
|
|
### Your code should initialize the following fields:
|
||
|
|
### self.stack: The current stack represented as a list with the top of the stack as the
|
||
|
|
### last element of the list.
|
||
|
|
### self.buffer: The current buffer represented as a list with the first item on the
|
||
|
|
### buffer as the first item of the list
|
||
|
|
### self.dependencies: The list of dependencies produced so far. Represented as a list of
|
||
|
|
### tuples where each tuple is of the form (head, dependent).
|
||
|
|
### Order for this list doesn't matter.
|
||
|
|
###
|
||
|
|
### Note: The root token should be represented with the string "ROOT"
|
||
|
|
###
|
||
|
|
self.stack = ['ROOT']
|
||
|
|
self.buffer = []
|
||
|
|
self.buffer = sentence
|
||
|
|
self.dependencies = []
|
||
|
|
|
||
|
|
### END YOUR CODE
|
||
|
|
|
||
|
|
|
||
|
|
def parse_step(self, transition):
|
||
|
|
"""Performs a single parse step by applying the given transition to this partial parse
|
||
|
|
|
||
|
|
@param transition (str): A string that equals "S", "LA", or "RA" representing the shift,
|
||
|
|
left-arc, and right-arc transitions. You can assume the provided
|
||
|
|
transition is a legal transition.
|
||
|
|
"""
|
||
|
|
### YOUR CODE HERE (~7-10 Lines)
|
||
|
|
### TODO:
|
||
|
|
### Implement a single parsing step, i.e. the logic for the following as
|
||
|
|
### described in the pdf handout:
|
||
|
|
### 1. Shift
|
||
|
|
### 2. Left Arc
|
||
|
|
### 3. Right Arc
|
||
|
|
|
||
|
|
|
||
|
|
if len(self.buffer) == 0 and len(self.stack)==1:
|
||
|
|
return
|
||
|
|
|
||
|
|
if transition == 'S':
|
||
|
|
self.stack.append(self.buffer[0])
|
||
|
|
self.buffer = self.buffer[1:]
|
||
|
|
elif transition == 'LA':
|
||
|
|
self.dependencies.append((self.stack[-1],self.stack[-2]))
|
||
|
|
del self.stack[-2]
|
||
|
|
else: #RA
|
||
|
|
self.dependencies.append((self.stack[-2],self.stack[-1]))
|
||
|
|
del self.stack[-1]
|
||
|
|
|
||
|
|
### END YOUR CODE
|
||
|
|
|
||
|
|
def parse(self, transitions):
|
||
|
|
"""Applies the provided transitions to this PartialParse
|
||
|
|
|
||
|
|
@param transitions (list of str): The list of transitions in the order they should be applied
|
||
|
|
|
||
|
|
@return dsependencies (list of string tuples): The list of dependencies produced when
|
||
|
|
parsing the sentence. Represented as a list of
|
||
|
|
tuples where each tuple is of the form (head, dependent).
|
||
|
|
"""
|
||
|
|
for transition in transitions:
|
||
|
|
self.parse_step(transition)
|
||
|
|
return self.dependencies
|
||
|
|
|
||
|
|
|
||
|
|
def minibatch_parse(sentences, model, batch_size):
|
||
|
|
"""Parses a list of sentences in minibatches using a model.
|
||
|
|
|
||
|
|
@param sentences (list of list of str): A list of sentences to be parsed
|
||
|
|
(each sentence is a list of words and each word is of type string)
|
||
|
|
@param model (ParserModel): The model that makes parsing decisions. It is assumed to have a function
|
||
|
|
model.predict(partial_parses) that takes in a list of PartialParses as input and
|
||
|
|
returns a list of transitions predicted for each parse. That is, after calling
|
||
|
|
transitions = model.predict(partial_parses)
|
||
|
|
transitions[i] will be the next transition to apply to partial_parses[i].
|
||
|
|
@param batch_size (int): The number of PartialParses to include in each minibatch
|
||
|
|
|
||
|
|
|
||
|
|
@return dependencies (list of dependency lists): A list where each element is the dependencies
|
||
|
|
list for a parsed sentence. Ordering should be the
|
||
|
|
same as in sentences (i.e., dependencies[i] should
|
||
|
|
contain the parse for sentences[i]).
|
||
|
|
"""
|
||
|
|
dependencies = []
|
||
|
|
|
||
|
|
### YOUR CODE HERE (~8-10 Lines)
|
||
|
|
### TODO:
|
||
|
|
### Implement the minibatch parse algorithm as described in the pdf handout
|
||
|
|
###
|
||
|
|
### Note: A shallow copy (as denoted in the PDF) can be made with the "=" sign in python, e.g.
|
||
|
|
### unfinished_parses = partial_parses[:].
|
||
|
|
### Here `unfinished_parses` is a shallow copy of `partial_parses`.
|
||
|
|
### In Python, a shallow copied list like `unfinished_parses` does not contain new instances
|
||
|
|
### of the object stored in `partial_parses`. Rather both lists refer to the same objects.
|
||
|
|
### In our case, `partial_parses` contains a list of partial parses. `unfinished_parses`
|
||
|
|
### contains references to the same objects. Thus, you should NOT use the `del` operator
|
||
|
|
### to remove objects from the `unfinished_parses` list. This will free the underlying memory that
|
||
|
|
### is being accessed by `partial_parses` and may cause your code to crash.
|
||
|
|
partial_parses = [ PartialParse(sent) for sent in sentences]
|
||
|
|
unfinished_parses = partial_parses[:]
|
||
|
|
batch_idx = 0
|
||
|
|
while len(unfinished_parses) != 0:
|
||
|
|
obj_list = unfinished_parses[:batch_size]
|
||
|
|
transition = model.predict(obj_list)
|
||
|
|
#Parse Step
|
||
|
|
count = 0
|
||
|
|
for obj in obj_list:
|
||
|
|
obj.parse_step(transition[count])
|
||
|
|
count+=1
|
||
|
|
#Clean up
|
||
|
|
count = 0
|
||
|
|
for obj in obj_list:
|
||
|
|
if len(obj.buffer) == 0 and len(obj.stack)==1:
|
||
|
|
unfinished_parses = unfinished_parses[:max(count,0)] + unfinished_parses[min(count+1,len(partial_parses)):]
|
||
|
|
else:
|
||
|
|
count+=1
|
||
|
|
dependencies = [obj.dependencies for obj in partial_parses]
|
||
|
|
### END YOUR CODE
|
||
|
|
|
||
|
|
return dependencies
|
||
|
|
|
||
|
|
|
||
|
|
def test_step(name, transition, stack, buf, deps,
|
||
|
|
ex_stack, ex_buf, ex_deps):
|
||
|
|
"""Tests that a single parse step returns the expected output"""
|
||
|
|
pp = PartialParse([])
|
||
|
|
pp.stack, pp.buffer, pp.dependencies = stack, buf, deps
|
||
|
|
|
||
|
|
pp.parse_step(transition)
|
||
|
|
stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies)))
|
||
|
|
assert stack == ex_stack, \
|
||
|
|
"{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack)
|
||
|
|
assert buf == ex_buf, \
|
||
|
|
"{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf)
|
||
|
|
assert deps == ex_deps, \
|
||
|
|
"{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
|
||
|
|
print("{:} test passed!".format(name))
|
||
|
|
|
||
|
|
|
||
|
|
def test_parse_step():
|
||
|
|
"""Simple tests for the PartialParse.parse_step function
|
||
|
|
Warning: these are not exhaustive
|
||
|
|
"""
|
||
|
|
test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [],
|
||
|
|
("ROOT", "the", "cat"), ("sat",), ())
|
||
|
|
test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [],
|
||
|
|
("ROOT", "cat",), ("sat",), (("cat", "the"),))
|
||
|
|
test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [],
|
||
|
|
("ROOT", "run",), (), (("run", "fast"),))
|
||
|
|
|
||
|
|
|
||
|
|
def test_parse():
|
||
|
|
"""Simple tests for the PartialParse.parse function
|
||
|
|
Warning: these are not exhaustive
|
||
|
|
"""
|
||
|
|
sentence = ["parse", "this", "sentence"]
|
||
|
|
dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"])
|
||
|
|
dependencies = tuple(sorted(dependencies))
|
||
|
|
expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this'))
|
||
|
|
assert dependencies == expected, \
|
||
|
|
"parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected)
|
||
|
|
assert tuple(sentence) == ("parse", "this", "sentence"), \
|
||
|
|
"parse test failed: the input sentence should not be modified"
|
||
|
|
print("parse test passed!")
|
||
|
|
|
||
|
|
|
||
|
|
class DummyModel(object):
|
||
|
|
"""Dummy model for testing the minibatch_parse function
|
||
|
|
First shifts everything onto the stack and then does exclusively right arcs if the first word of
|
||
|
|
the sentence is "right", "left" if otherwise.
|
||
|
|
"""
|
||
|
|
def predict(self, partial_parses):
|
||
|
|
return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S"
|
||
|
|
for pp in partial_parses]
|
||
|
|
|
||
|
|
|
||
|
|
def test_dependencies(name, deps, ex_deps):
|
||
|
|
"""Tests the provided dependencies match the expected dependencies"""
|
||
|
|
deps = tuple(sorted(deps))
|
||
|
|
assert deps == ex_deps, \
|
||
|
|
"{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
|
||
|
|
|
||
|
|
|
||
|
|
def test_minibatch_parse():
|
||
|
|
"""Simple tests for the minibatch_parse function
|
||
|
|
Warning: these are not exhaustive
|
||
|
|
"""
|
||
|
|
sentences = [["right", "arcs", "only"],
|
||
|
|
["right", "arcs", "only", "again"],
|
||
|
|
["left", "arcs", "only"],
|
||
|
|
["left", "arcs", "only", "again"]]
|
||
|
|
deps = minibatch_parse(sentences, DummyModel(), 2)
|
||
|
|
test_dependencies("minibatch_parse", deps[0],
|
||
|
|
(('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs')))
|
||
|
|
test_dependencies("minibatch_parse", deps[1],
|
||
|
|
(('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs')))
|
||
|
|
test_dependencies("minibatch_parse", deps[2],
|
||
|
|
(('only', 'ROOT'), ('only', 'arcs'), ('only', 'left')))
|
||
|
|
test_dependencies("minibatch_parse", deps[3],
|
||
|
|
(('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only')))
|
||
|
|
print("minibatch_parse test passed!")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
args = sys.argv
|
||
|
|
if len(args) != 2:
|
||
|
|
raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script")
|
||
|
|
elif args[1] == "part_c":
|
||
|
|
test_parse_step()
|
||
|
|
test_parse()
|
||
|
|
elif args[1] == "part_d":
|
||
|
|
test_minibatch_parse()
|
||
|
|
else:
|
||
|
|
raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script")
|