import sys import csv import unicodedata import re import random def get_poem_data(): """Preprocesses poem dataset""" pf = "kaggle_poem_dataset.csv" with open(pf,'r', encoding='utf-8') as csvf: reader = csv.DictReader(csvf) poems = [r for r in reader] for i,poem in enumerate(poems): # Normalize weird characters: poem['Content'] = unicodedata.normalize('NFKC', poem['Content']) return poems def greet(): """Greets user and returns user's name""" print("Hello! I am Odette, a poetry-recommending chatbot.") name = input("What is your name? ") return name def theme(): """Queries the user's topic likes and dislikes and returns""" print("To help me pick a poem, tell me a little about yourself.") pos_text = input("What are some things you like? ") pos_text = remove_verbs(pos_text) likes = find_items(pos_text) return likes def remove_verbs(s): """Removes preference verbs""" v_list = ["like","love","adore"] for w in v_list: if w in s: s = s.split(w)[1] return s def find_items(s): """Finds list items in a user response string""" bits = [w.strip() for w in re.split(r',|and|;|\.|or',s)] return [b for b in bits if b] def find_thematic_poems(topic,poems): """Retrieves a set of poems with a particular theme, sorted by theme mention frequency""" thematic_poems = [] for poem in poems: if ' '+topic+' ' in poem['Content']: thematic_poems.append(poem) return thematic_poems def main(): poems = get_poem_data() name = greet() likes = theme() random.shuffle(likes) new_poems = poems # Apply theme preferences while len(new_poems) > 1 and len(likes) > 0: topic = likes.pop() theme_poems = find_thematic_poems(topic,new_poems) if len(theme_poems) > 1: new_poems = theme_poems print(f"I'm searching for poems about {topic}.") # Print first poem print(new_poems[0]) main()