# Authors: Sidney Steward and Francys Scott # Date: May 15, 2013 # Purpose: Wellesley Events final project # File was modified by Eni Mustafaraj on Feb 20, 2015. from datetime import datetime import urllib2 from xml.dom.minidom import parseString import json def get_text(dom, nodeName): "Helper function to read the text from a given node." txt = "" try: txt = dom.getElementsByTagName(nodeName)[0].firstChild.nodeValue except: print "Couldn't find text for node: ", nodeName return txt.strip() eventTypeDict = {"Arts":['Event-Concert', 'Event-Exhibit', 'Event-Film', 'Event-Performance'], "Sports":['P E R A Camp', 'P E R A Class','P E R A Contest', 'P E R A Meeting', 'P E R A Open Recreation', 'P E R A Practice', 'P E R A Recreation Program', 'P E R A Scrimmage', 'P E R A Special Event', 'Student: Club Sport', 'Student: Club Sport Contest', 'Student: Club Sport Practice'], "CWS":['Event-Workshop', 'Event-Training', 'Event-Webinar'], "LPS":['Event-Interviews', 'Event-Lecture', 'Event-Panel', 'Event-Seminar', 'Event-Symposium', 'Registrar-Lecture'], "Religious":['Event-Religious/Spiritual', 'Student: Religious/Spiritual'], "Student":['Student: Ceremony', 'Student: Class (Non-credit)', 'Student: Party', 'Student: Performance', 'Student: Reception', 'Student: Recital', 'Student: Rehearsal', 'Student: Seminar','Student: Study Break', 'Student: Symposium', 'Student: Trip', 'Student: Community Service', 'Student: Concert', 'Student: Conference', 'Student: Dinner', 'Student: Exhibit', 'Student: Film Showing', 'Student: Lecture', 'Student: Lunch/Brunch', 'Student: Meeting', 'Student: Overnight', 'Student: Panel', 'Student: Vendor', 'Student: Wellness', 'Student: Workshop'], "Miscellaneous":['Event-Announcement', 'Event-Breakfast', 'Event-Ceremony', 'Event-Community Service', 'Event-Dinner', 'Event-Luncheon', 'Event-Meeting', 'Event-Party', 'Event-Reception', 'Event-Recital', 'Event-Rehearsal','Event-Study Break', 'Event-Tour', 'Event-Wellness', 'Registrar-Class Presentation', 'Registrar-Exam', 'Registrar-Film', 'Registrar-Honors Oral', 'Registrar-Meeting', 'Registrar-Review Session', 'Registrar-Tutoring', 'Vendor']} def read_data(url): "Reads data from RSS and extracts a few desired fields" page = urllib2.urlopen(url) data = page.read() xmldoc = parseString(data) items = xmldoc.getElementsByTagName('item') # every event is an item eventsDct = {} for item in items: title = get_text(item, 'title') starttime = get_text(item, 'start_time') endtime = get_text(item, 'end_time') link = get_text(item, 'link') id = get_text(item, 'link').split('=')[1] # get event id organizer = get_text(item, 'requestor') description = get_text(item, 'description') place = get_text(item, 'location') eventType = get_text(item, 'event_type').strip() # remove white space for key in eventTypeDict: if eventType in eventTypeDict[key]: category = key else: category = "" break eventsDct[id] = {'id': id, 'title': title, 'url': link, 'starttime': starttime, 'endtime': endtime, 'organizer': organizer, 'description': description, 'eventType': eventType, 'category': category, 'place': place} return eventsDct # Main method. # Execute the code to read the feed and store the events URL = "https://events.wellesley.edu/cs-rss.php" events = read_data(URL) #create_events() print len(events) print events.items()[:4] json.dump(events, open("WCevents.json", 'w'))