CS50AI - Parser - Solutions
If you have doubts refer this.Don't try to copy and answer.
Name: CS50's Introduction to Artificial Intelligence with Python
University: Harvard University
URL: https://cs50.harvard.edu/ai/2020/
My GitHub: https://github.com/Dinesh-Kumar-E
Automatic CS50 Submit Tool: https://github.com/Dinesh-Kumar-E/CS50-Submit
See: https://cs50.harvard.edu/college/2021/fall/syllabus/#academic-honesty
parser.py
import nltk
import sys
import string
TERMINALS = """
Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red"
Adv -> "down" | "here" | "never"
Conj -> "and" | "until"
Det -> "a" | "an" | "his" | "my" | "the"
N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself"
N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she"
N -> "smile" | "thursday" | "walk" | "we" | "word"
P -> "at" | "before" | "in" | "of" | "on" | "to"
V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat"
V -> "smiled" | "tell" | "were"
"""
NONTERMINALS = """
S -> PART | PART Conj PART
PART -> NP VP | NP Adv VP | VP
NP -> N | NA N
NA -> Det | Adj | NA NA
VP -> V | V SUPP
SUPP -> NP | P | Adv | SUPP SUPP | SUPP SUPP SUPP
"""
grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS)
parser = nltk.ChartParser(grammar)
def main():
# If filename specified, read sentence from file
if len(sys.argv) == 2:
with open(sys.argv[1]) as f:
s = f.read()
# Otherwise, get sentence as input
else:
s = input("Sentence: ")
# Convert input into list of words
s = preprocess(s)
# Attempt to parse sentence
try:
trees = list(parser.parse(s))
except ValueError as e:
print(e)
return
if not trees:
print("Could not parse sentence.")
return
# Print each tree with noun phrase chunks
for tree in trees:
tree.pretty_print()
print("Noun Phrase Chunks")
for np in np_chunk(tree):
print(" ".join(np.flatten()))
def preprocess(sentence):
"""
Convert `sentence` to a list of its words.
Pre-process sentence by converting all characters to lowercase
and removing any word that does not contain at least one alphabetic
character.
"""
tokenized = nltk.tokenize.word_tokenize(sentence)
return [x.lower() for x in tokenized if x.isalpha()]
def np_chunk(tree):
"""
Return a list of all noun phrase chunks in the sentence tree.
A noun phrase chunk is defined as any subtree of the sentence
whose label is "NP" that does not itself contain any other
noun phrases as subtrees.
"""
retlst = []
for subtree in tree.subtrees():
if subtree.label() == "NP":
retlst.append(subtree)
return retlst
if __name__ == "__main__":
main()
Comments
Post a Comment