Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
shauryagoyall authored Aug 11, 2022
1 parent 5d68224 commit d1bb657
Show file tree
Hide file tree
Showing 11 changed files with 71,819 additions and 0 deletions.
Binary file added Homework/3 Reconstruct/astar_search.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
397 changes: 397 additions & 0 deletions Homework/3 Reconstruct/grader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,397 @@
#!/usr/bin/python3

import graderUtil
import util
import random
import sys
import wordsegUtil
import copy

grader = graderUtil.Grader()
submission = grader.load('submission')

QUERIES_SEG = [
'ThestaffofficerandPrinceAndrewmountedtheirhorsesandrodeon',
'hellothere officerandshort erprince',
'howdythere',
'The staff officer and Prince Andrew mounted their horses and rode on.',
'whatsup',
'duduandtheprince',
'duduandtheking',
'withoutthecourtjester',
'lightbulbneedschange',
'imagineallthepeople',
'thisisnotmybeautifulhouse',
]

QUERIES_INS = [
'strng',
'pls',
'hll thr',
'whats up',
'dudu and the prince',
'frog and the king',
'ran with the queen and swam with jack',
'light bulbs need change',
'ffcr nd prnc ndrw',
'ffcr nd shrt prnc',
'ntrntnl',
'smthng',
'btfl',
]

QUERIES_BOTH = [
'stff',
'hllthr',
'thffcrndprncndrw',
'ThstffffcrndPrncndrwmntdthrhrssndrdn',
'whatsup',
'ipovercarrierpigeon',
'aeronauticalengineering',
'themanwiththegoldeneyeball',
'lightbulbsneedchange',
'internationalplease',
'comevisitnaples',
'somethingintheway',
'itselementarymydearwatson',
'itselementarymyqueen',
'themanandthewoman',
'nghlrdy',
'jointmodelingworks',
'jointmodelingworkssometimes',
'jointmodelingsometimesworks',
'rtfclntllgnc',
]

CORPUS = 'leo-will.txt'

_realUnigramCost, _realBigramCost, _possibleFills = None, None, None

def getRealCosts():
global _realUnigramCost, _realBigramCost, _possibleFills

if _realUnigramCost is None:
sys.stdout.write('Training language cost functions [corpus: %s]... ' % CORPUS)
sys.stdout.flush()

_realUnigramCost, _realBigramCost = wordsegUtil.makeLanguageModels(CORPUS)
_possibleFills = wordsegUtil.makeInverseRemovalDictionary(CORPUS, 'aeiou')

print('Done!')
print('')

return _realUnigramCost, _realBigramCost, _possibleFills


def add_parts_1(grader, submission):
grader.add_manual_part('1a', 2, description='example to justify the greedy algorithm is suboptimal in word segmentation')

if grader.selectedPartName in ['1b-2-basic', '1b-3-hidden', '1b-4-hidden', None]: # avoid timeouts
unigramCost, _, _ = getRealCosts()

def t_1b_1():
def unigramCost(x):
if x in ['and', 'two', 'three', 'word', 'words']:
return 1.0
else:
return 1000.0

grader.require_is_equal('', submission.segmentWords('', unigramCost))
grader.require_is_equal('word', submission.segmentWords('word', unigramCost))
grader.require_is_equal('two words', submission.segmentWords('twowords', unigramCost))
grader.require_is_equal('and three words', submission.segmentWords('andthreewords', unigramCost))

grader.add_basic_part('1b-1-basic', t_1b_1, max_points=1, max_seconds=2, description='simple test case using hand-picked unigram costs')

def t_1b_2():
grader.require_is_equal('word', submission.segmentWords('word', unigramCost))
grader.require_is_equal('two words', submission.segmentWords('twowords', unigramCost))
grader.require_is_equal('and three words', submission.segmentWords('andthreewords', unigramCost))

grader.add_basic_part('1b-2-basic', t_1b_2, max_points=1, max_seconds=2, description='simple test case using unigram cost from the corpus')

def t_1b_3():
# Word seen in corpus
solution1 = submission.segmentWords('pizza', unigramCost)

# Even long unseen words are preferred to their arbitrary segmentations
solution2 = submission.segmentWords('qqqqq', unigramCost)
solution3 = submission.segmentWords('z' * 100, unigramCost)

# But 'a' is a word
solution4 = submission.segmentWords('aa', unigramCost)

# With an apparent crossing point at length 6->7
solution5 = submission.segmentWords('aaaaaa', unigramCost)
solution6 = submission.segmentWords('aaaaaaa', unigramCost)


grader.add_hidden_part('1b-3-hidden', t_1b_3, max_points=3, max_seconds=3, description='simple hidden test case')

def t_1b_4():
for query in QUERIES_SEG:
query = wordsegUtil.cleanLine(query)
parts = wordsegUtil.words(query)
pred = [submission.segmentWords(part, unigramCost) for part in parts]

grader.add_hidden_part('1b-4-hidden', t_1b_4, max_points=5, max_seconds=3, description='hidden test case for all queries in QUERIES_SEG')


def add_parts_2(grader, submission):
grader.add_manual_part('2a', 2, description='example to justify the greedy algorithm is suboptimal in vowel insertion')

if grader.selectedPartName in ['2b-2-hidden', '2b-4-hidden', None]: # avoid timeouts
_, bigramCost, possibleFills = getRealCosts()

def t_2b_1():
def bigramCost(a, b):
corpus = [wordsegUtil.SENTENCE_BEGIN] + 'beam me up scotty'.split()
if (a, b) in list(zip(corpus, corpus[1:])):
return 1.0
else:
return 1000.0

def possibleFills(x):
fills = {
'bm' : set(['beam', 'bam', 'boom']),
'm' : set(['me', 'ma']),
'p' : set(['up', 'oop', 'pa', 'epe']),
'sctty': set(['scotty']),
}
return fills.get(x, set())

grader.require_is_equal(
'',
submission.insertVowels([], bigramCost, possibleFills)
)
grader.require_is_equal( # No fills
'zz$z$zz',
submission.insertVowels(['zz$z$zz'], bigramCost, possibleFills)
)
grader.require_is_equal(
'beam',
submission.insertVowels(['bm'], bigramCost, possibleFills)
)
grader.require_is_equal(
'me up',
submission.insertVowels(['m', 'p'], bigramCost, possibleFills)
)
grader.require_is_equal(
'beam me up scotty',
submission.insertVowels('bm m p sctty'.split(), bigramCost, possibleFills)
)

grader.add_basic_part('2b-1-basic', t_2b_1, max_points=1, max_seconds=2, description='simple test case')

def t_2b_2():
solution1 = submission.insertVowels([], bigramCost, possibleFills)
# No fills
solution2 = submission.insertVowels(['zz$z$zz'], bigramCost, possibleFills)
solution3 = submission.insertVowels([''], bigramCost, possibleFills)
solution4 = submission.insertVowels('wld lk t hv mr lttrs'.split(), bigramCost, possibleFills)
solution5 = submission.insertVowels('ngh lrdy'.split(), bigramCost, possibleFills)


grader.add_hidden_part('2b-2-hidden', t_2b_2, max_points=3, max_seconds=2, description='simple hidden test case')

def t_2b_3():
SB = wordsegUtil.SENTENCE_BEGIN

# Check for correct use of SENTENCE_BEGIN
def bigramCost(a, b):
if (a, b) == (SB, 'cat'):
return 5.0
elif a != SB and b == 'dog':
return 1.0
else:
return 1000.0

solution1 = submission.insertVowels(['x'], bigramCost, lambda x: set(['cat', 'dog']))

# Check for non-greediness

def bigramCost(a, b):
# Dog over log -- a test poem by rf
costs = {
(SB, 'cat'): 1.0, # Always start with cat

('cat', 'log'): 1.0, # Locally prefer log
('cat', 'dog'): 2.0, # rather than dog

('log', 'mouse'): 3.0, # But dog would have been
('dog', 'mouse'): 1.0, # better in retrospect
}
return costs.get((a, b), 1000.0)

def fills(x):
return {
'x1': set(['cat', 'dog']),
'x2': set(['log', 'dog', 'frog']),
'x3': set(['mouse', 'house', 'cat'])
}[x]

solution2 = submission.insertVowels('x1 x2 x3'.split(), bigramCost, fills)

# Check for non-trivial long-range dependencies
def bigramCost(a, b):
# Dogs over logs -- another test poem by rf
costs = {
(SB, 'cat'): 1.0, # Always start with cat

('cat', 'log1'): 1.0, # Locally prefer log
('cat', 'dog1'): 2.0, # Rather than dog

('log20', 'mouse'): 1.0, # And this might even
('dog20', 'mouse'): 1.0, # seem to be okay
}
for i in range(1, 20): # But along the way
# Dog's cost will decay
costs[('log' + str(i), 'log' + str(i+1))] = 0.25
costs[('dog' + str(i), 'dog' + str(i+1))] = 1.0 / float(i)
# Hooray
return costs.get((a, b), 1000.0)

def fills(x):
f = {
'x0': set(['cat', 'dog']),
'x21': set(['mouse', 'house', 'cat']),
}
for i in range(1, 21):
f['x' + str(i)] = set(['log' + str(i), 'dog' + str(i), 'frog'])
return f[x]

solution3 = submission.insertVowels(['x' + str(i) for i in range(0, 22)], bigramCost, fills)


grader.add_hidden_part('2b-3-hidden', t_2b_3, max_points=3, max_seconds=3, description='simple hidden test case')

def t_2b_4():
for query in QUERIES_INS:
query = wordsegUtil.cleanLine(query)
ws = [wordsegUtil.removeAll(w, 'aeiou') for w in wordsegUtil.words(query)]
pred = submission.insertVowels(copy.deepcopy(ws), bigramCost, possibleFills)

grader.add_hidden_part('2b-4-hidden', t_2b_4, max_points=3, max_seconds=3, description='hidden test case for all queries in QUERIES_INS')


def add_parts_3(grader, submission):
grader.add_manual_part('3a', 4, description='formalize the search problem')

if grader.selectedPartName in ['3b-2-basic', '3b-3-hidden', '3b-5-hidden', None]: # avoid timeouts
unigramCost, bigramCost, possibleFills = getRealCosts()

def t_3b_1():
def bigramCost(a, b):
if b in ['and', 'two', 'three', 'word', 'words']:
return 1.0
else:
return 1000.0

fills_ = {
'nd': set(['and']),
'tw': set(['two']),
'thr': set(['three']),
'wrd': set(['word']),
'wrds': set(['words']),
}
fills = lambda x: fills_.get(x, set())

grader.require_is_equal('', submission.segmentAndInsert('', bigramCost, fills))
grader.require_is_equal('word', submission.segmentAndInsert('wrd', bigramCost, fills))
grader.require_is_equal('two words', submission.segmentAndInsert('twwrds', bigramCost, fills))
grader.require_is_equal('and three words', submission.segmentAndInsert('ndthrwrds', bigramCost, fills))

grader.add_basic_part('3b-1-basic', t_3b_1, max_points=1, max_seconds=2, description='simple test case with hand-picked bigram costs and possible fills')

def t_3b_2():
bigramCost = lambda a, b: unigramCost(b)

fills_ = {
'nd': set(['and']),
'tw': set(['two']),
'thr': set(['three']),
'wrd': set(['word']),
'wrds': set(['words']),
}
fills = lambda x: fills_.get(x, set())

grader.require_is_equal(
'word',
submission.segmentAndInsert('wrd', bigramCost, fills))
grader.require_is_equal(
'two words',
submission.segmentAndInsert('twwrds', bigramCost, fills))
grader.require_is_equal(
'and three words',
submission.segmentAndInsert('ndthrwrds', bigramCost, fills))

grader.add_basic_part('3b-2-basic', t_3b_2, max_points=1, max_seconds=2, description='simple test case with unigram costs as bigram costs')

def t_3b_3():
bigramCost = lambda a, b: unigramCost(b)
fills_ = {
'nd': set(['and']),
'tw': set(['two']),
'thr': set(['three']),
'wrd': set(['word']),
'wrds': set(['words']),
# Hah! Hit them with two better words
'th': set(['the']),
'rwrds': set(['rewards']),
}
fills = lambda x: fills_.get(x, set())

solution1 = submission.segmentAndInsert('wrd', bigramCost, fills)
solution2 = submission.segmentAndInsert('twwrds', bigramCost, fills)
# Waddaya know
solution3 = submission.segmentAndInsert('ndthrwrds', bigramCost, fills)


grader.add_hidden_part('3b-3-hidden', t_3b_3, max_points=5, max_seconds=3, description='hidden test case with unigram costs as bigram costs and additional possible fills')

def t_3b_4():
def bigramCost(a, b):
corpus = [wordsegUtil.SENTENCE_BEGIN] + 'beam me up scotty'.split()
if (a, b) in list(zip(corpus, corpus[1:])):
return 1.0
else:
return 1000.0

def possibleFills(x):
fills = {
'bm' : set(['beam', 'bam', 'boom']),
'm' : set(['me', 'ma']),
'p' : set(['up', 'oop', 'pa', 'epe']),
'sctty': set(['scotty']),
'z' : set(['ze']),
}
return fills.get(x, set())

# Ensure no non-word makes it through
solution1 = submission.segmentAndInsert('zzzzz', bigramCost, possibleFills)
solution2 = submission.segmentAndInsert('bm', bigramCost, possibleFills)
solution3 = submission.segmentAndInsert('mp', bigramCost, possibleFills)
solution4 = submission.segmentAndInsert('bmmpsctty', bigramCost, possibleFills)


grader.add_hidden_part('3b-4-hidden', t_3b_4, max_points=5, max_seconds=3, description='hidden test case with hand-picked bigram costs and possible fills')

def t_3b_5():
smoothCost = wordsegUtil.smoothUnigramAndBigram(unigramCost, bigramCost, 0.2)
for query in QUERIES_BOTH:
query = wordsegUtil.cleanLine(query)
parts = [wordsegUtil.removeAll(w, 'aeiou') for w in wordsegUtil.words(query)]
pred = [submission.segmentAndInsert(part, smoothCost, possibleFills) for part in parts]

grader.add_hidden_part('3b-5-hidden', t_3b_5, max_points=6, max_seconds=3, description='hidden test case for all queries in QUERIES_BOTH with bigram costs and possible fills from the corpus')

def add_parts_4(grader, submission):
grader.add_manual_part('4a', 2, description='example analysis')
grader.add_manual_part('4b', 2, description='transparency statement')
grader.add_manual_part('4c', 2, description='improved algorithm')

add_parts_1(grader, submission)
add_parts_2(grader, submission)
add_parts_3(grader, submission)
add_parts_4(grader, submission)
grader.grade()
Loading

0 comments on commit d1bb657

Please sign in to comment.