# Filename: wellesley_json_data.py # Date: March 12, 2015 # Author: Eni Mustafaraj # Purpose: Show how to use Python with JSON data for the Wellesley Courses data """ How to use this file. 1) Download the folder wellesleyJSON from the AM5 webpage and unzip it. 2) Create a folder, let's say AM5data and move within it the wellesleyJSON folder 3) Move this file in AM5data, so that this file and wellesleyJSON folder are siblings 4) If you know how to use Canopy for Python, open the file and run it. 5) If not, check to see whether you have Python installed, by opening a terminal window and typing python. If not, you can login to your CS account with ssh and run this code there. In that case, you'll need to move the JSON folder there too, using FETCH. 6) Once you have Python and are in the folder AM5data (use the command cd to go there) you can type "python wellesley_json_data.py" (without the quotes) and follow the instructions. Your JSON files will show up within the AM5data folder. """ import os, json, re PATH = "wellesleyJSON/" def is_lab(crsCode): "Find whether a course code indicates a lab" # the lab/discussion courses have the letters 'C', 'D', 'L', # 'LA, 'LB' in the second part. For example: "CS 111 - L01" part = crsCode.split('-')[1] # use regular expressions to find any of these tokens if re.search('D|L|C|LA|LB', part): return True return False # A function that given a course code, such as "CS 110", finds enrollments over # all semesters. We cannot use CRN, because they are different from semester to # semester. Function will also filter out lab sections and combine multiple sections. # It returns an empty dictionary if it doesn't find a course. def find_enrollments(shortCode, allCoursesLst): enrollDct = {"sections": 0, "enrolled": 0, "capacity" : 0} for course in allCoursesLst: officialCode = course["Course"] if officialCode.find(shortCode) == -1: continue # move to next course in 'allCoursesLst' else: # we found our course, let's read its data if not is_lab(officialCode): # do that only if it's not the lab section enrollDct["sections"] += 1 enrollDct["enrolled"] += int(course["Current Enrollment"]) capacity = course["Seats Available"].split("/")[-1].strip() enrollDct["capacity"] += int(capacity) # check if we found results by taking the sum of all values in enrollDct if sum(enrollDct.values()) == 0: return {} else: return enrollDct def main(courseName): "Function that is called with the user entry" # grab files and make sure they are all JSON files files = [f for f in os.listdir(PATH) if f.endswith('.json')] enrollments = {} # store here results from all semesters for f in files: data = json.load(open(PATH+f)) semester = f.split(".")[0] # get semester code from filename courses = data["courses"] result = find_enrollments(courseName, courses) if result: enrollments[semester] = result # store results in a JSON file fName = "enrollments_%s.json" % courseName.replace(" ", "_") json.dump(enrollments, open(fName,'w')) # display message to user print "We found enrollment data for: ", len(enrollments), " semesters." print "Data was stored in the file: ", fName message = """Enter a course code, e.g., CS 110 or CHEM 105. Don't forget to use uppercase and have a space between subject code and number: """ if __name__ == "__main__": courseName = raw_input(message) main(courseName)