import sys
import csv
import unicodedata
import re
import random

def get_poem_data():
	"""Preprocesses poem dataset"""
	pf = "kaggle_poem_dataset.csv"
	with open(pf,'r', encoding='utf-8') as csvf:
		reader = csv.DictReader(csvf)
		poems = [r for r in reader]
	for i,poem in enumerate(poems):
		# Normalize weird characters:
		poem['Content'] = unicodedata.normalize('NFKC', poem['Content'])
	return poems

def greet():
	"""Greets user and returns user's name"""
	print("Hello! I am Odette, a poetry-recommending chatbot.")
	name = input("What is your name? ")
	return name

def theme():
	"""Queries the user's topic likes and dislikes and returns"""
	print("To help me pick a poem, tell me a little about yourself.")
	pos_text = input("What are some things you like? ")
	pos_text = remove_verbs(pos_text)
	likes = find_items(pos_text)
	return likes

def remove_verbs(s):
	"""Removes preference verbs"""
	v_list = ["like","love","adore"]
	for w in v_list:
		if w in s:
			s = s.split(w)[1]
	return s

def find_items(s):
	"""Finds list items in a user response string"""
	bits = [w.strip() for w in re.split(r',|and|;|\.|or',s)]
	return [b for b in bits if b]

def find_thematic_poems(topic,poems):
	"""Retrieves a set of poems with a particular theme, sorted by theme mention frequency"""
	thematic_poems = []
	for poem in poems:
		if ' '+topic+' ' in poem['Content']:
			thematic_poems.append(poem)
	return thematic_poems

def main():
	poems = get_poem_data()	
	name = greet()
	likes = theme()
	random.shuffle(likes)
	new_poems = poems
	# Apply theme preferences
	while len(new_poems) > 1 and len(likes) > 0:
		topic = likes.pop()
		theme_poems = find_thematic_poems(topic,new_poems)
		if len(theme_poems) > 1:
			new_poems = theme_poems
			print(f"I'm searching for poems about {topic}.")
	# Print first poem
	print(new_poems[0])


main()