
Functions for loading submitted & solution code.


  7import importlib
  8import os
  9import sys
 10import types
 11import tempfile
 12import shutil
 13import base64
 14import mimetypes
 16import bs4
 18from . import mast
 19from . import logging
 20from . import render
 24# Setup #
 27def setup(specs_dir, sandbox_dir):
 28    """
 29    Sets the specifications and sandbox directories.
 30    """
 31    global SPECS_DIR, SANDBOX_DIR
 32    SPECS_DIR = specs_dir
 33    SANDBOX_DIR = sandbox_dir
 34    # Ensure sandboxes directory exists
 35    os.makedirs(SANDBOX_DIR, exist_ok=True)
 39# Loader functions #
 43def load_task_spec(task_info):
 44    """
 45    Loads a task specification module for the specified task. Returns the
 46    imported module. Augments the module with the following values:
 48    - taskid: The task ID for the task
 49    - base_path: the path to the spec file
 50    - soln_path: the path to the solution files directory
 51    - starter_path: the path to the starter files directory
 52    - starter_src: the source code for the main starter file
 53        (or an empty string if there is no starter file or if the
 54        task requires more than one file)
 55    - soln_files: all files/directories in the solution directory (not
 56        full paths)
 57    - starter_files: all files/directories in the starter directory (not
 58        full paths)
 59    - helper_files: a list of strings naming files/directories which are
 60        in the starter directory and the solution directory but which
 61        aren't the main task file (or directory) itself. These are just
 62        the file names, not the full paths.
 63    """
 64    # Set up sys.path and import specifically:
 65    # Note: Relevant directories will need __init__.py files!
 66    logging.log("Loading specification for '{}'".format(task_info['id']))
 67    spec_target = os.path.join(SPECS_DIR, task_info["id"], "spec.py")
 68    logging.log("    loading from: {}".format(spec_target))
 69    sys.path.insert(0, SPECS_DIR)
 70    try:
 71        spec = importlib.import_module(task_info["id"] + '.spec')
 72    except Exception:
 73        logging.log("Fatal error: Unable to load task specification.")
 74        logging.log_current_exception()
 75        raise
 76    sys.path.pop(0)
 78    # Augment imported module
 79    here = os.path.dirname(spec.__file__)
 80    spec.taskid = task_info["id"]
 81    spec.base_path = here
 82    spec.soln_path = os.path.join(here, 'soln')
 83    spec.starter_path = os.path.join(here, 'starter')
 84    starter_file = os.path.join(spec.starter_path, task_info["target"])
 85    if os.path.isfile(starter_file):
 86        with open(starter_file, encoding="utf-8") as fin:
 87            spec.starter_src = fin.read()
 88    else:
 89        spec.starter_src = ""
 91    spec.soln_files = os.listdir(spec.soln_path)
 92    if os.path.exists(spec.starter_path):
 93        spec.starter_files = os.listdir(spec.starter_path)
 94    else:
 95        spec.starter_files = []
 96    spec.helper_files = list(
 97        (set(spec.soln_files) & set(spec.starter_files))
 98      - set([task_info["target"]])
 99    )
101    logging.log("...done loading specification")
103    return spec
106def import_soln(taskspec):
107    '''
108    Uses importlib to import the solution module for the given task. If
109    the module has already been imported, reloads it.
111    Returns the imported module object.
113    Fails if this task doesn't have a Python source file.
114    '''
115    # Here we temporarily both change cwd *and* push it onto our sys.path.
116    original_directory = os.getcwd()
117    os.chdir(taskspec.soln_path)
118    sys.path.insert(0, os.getcwd())
119    try:
120        module_name = taskspec.src.replace('.py', '')
121        if module_name in sys.modules:
122            return importlib.reload(sys.modules[module_name])
123        else:
124            return importlib.import_module(module_name)
125    finally:
126        # Reset cwd and sys.path:
127        os.chdir(original_directory)
128        sys.path = sys.path[1:]
131def load_instructions_html(spec):
132    """
133    Given a specifications module, loads the instructions for that module
134    and converts them from markdown to HTML. This loads "instructions.md"
135    in the spec folder, or if it's not present, the dosctring of the
136    specs module. Logs a message about where it fetched the instructions
137    from.
139    Note that the instructions may need resources copied with them in
140    order to render properly, but this function doesn't handle that
141    completely. What it does do is look for resources it knows how to
142    handle (img, audio, and video tags) and embed their data into the
143    HTML result as base64-encoded data: URLs. This makes it easier for
144    the instructions to be embedded in multiple contexts, although it
145    does increase their overall size a bit.
146    """
147    # Default path to check
148    src = os.path.join(spec.base_path, "instructions.md")
149    if os.path.exists(src):
150        with open(src, 'r', encoding="utf-8") as fin:
151            instructions = fin.read()
152        logging.log("Fetched instructions from '{}'...".format(src))
153    elif spec.__doc__:
154        # Pull from spec docstring if there's no instructions.md file
155        instructions = spec.__doc__
156        logging.log(
157            "Fetched instructions from spec module's docstring..."
158        )
159    else:
160        logging.log("Couldn't find any instructions...")
161        instructions = "(no instructions available)"
163    # Convert to HTML
164    html = render.render_markdown(instructions)
166    # Now we need to embed resource files...
168    # Get a bs4 handle for it
169    soup = bs4.BeautifulSoup(html, "html.parser")
171    # Find all img, audio, or video tags with src attributes...
172    for tag in soup.find_all(
173        lambda tag: (
174            tag.name in ("img", "audio", "video")
175        and tag.has_attr("src")
176        )
177    ):
178        orig_src = tag["src"]
179        target = os.path.join(spec.base_path, orig_src)
180        if os.path.isfile(target):
181            mime, enc = mimetypes.guess_type(target, strict=False)
182            if mime is None:
183                mime = "text/plain"
184            # TODO: Handle encoding guess!
185            with open(target, 'rb') as fin:
186                src_bytes = fin.read()
187                data = base64.standard_b64encode(src_bytes).decode("utf-8")
188            # Build a data URI and update the src attribute:
189            data_uri = "data:{};base64,{}".format(mime, data)
190            tag['src'] = data_uri
191            # Log a message
192            logging.log("Embedded resource '{}'".format(orig_src))
193        else:
194            # Log a warning and let it go...
195            logging.log(
196                "Warning: resource '{}' was not found.".format(orig_src)
197            )
199    return str(soup)
203# Module loading & parsing #
207def create_module_in_sandbox(
208    node,
209    filename,
210    sandbox_dir=None,
211    sandbox_links=None,
212    sandbox_files=None,
213    on_disk=None
215    """
216    Given an AST node and a filename, creates a temporary sandbox
217    directory, runs the code in the sandbox to create a module object,
218    and returns the module object that was created.
220    An explicit sandbox directory that's already set up may be provided
221    via the `sandbox_dir` parameter. If none is provided, a new sandbox
222    will be created and then destroyed in the course of running this
223    function. If an existing sandbox will be used, `sandbox_links` and
224    `sandbox_files` are ignored.
226    If a pre-existing sandbox isn't provided and extra files are needed
227    in the sandbox, a dictionary mapping absolute paths to
228    paths-in-sandbox can be supplied and those files will be symlinked in
229    (see `link_mapping`). Alternatively, an equivalently-structured
230    sandbox_files directory may be supplied to copying files rather than
231    creating links, which is typically less efficient, but desirable if
232    those files will be modified.
234    If on_disk is provided, it should be a full path to the file that the
235    code was parsed from, and will be used to provide a __file__
236    variable while the code runs.
237    """
238    if sandbox_dir is not None:
239        # A sandbox directory has been provided ready-to-use; ignore
240        # sandbox_links and sandbox_files.
242        # Create the module
243        result = create_module_from_code(
244            node,
245            filename,
246            on_disk=on_disk,
247            sandbox=sandbox_dir
248        )
249    else:
250        # We need to create our own sandbox directory
251        with tempfile.TemporaryDirectory(
252            suffix="__tmp",
253            dir=SANDBOX_DIR
254        ) as tmpdir:
255            # Create symlinks
256            if sandbox_links is not None:
257                for filepath in sandbox_links:
258                    to = os.path.join(tmpdir, sandbox_links[filepath])
259                    os.symlink(filepath, to)
261            # Copy files
262            if sandbox_files is not None:
263                for filepath in sandbox_files:
264                    to = os.path.join(tmpdir, sandbox_files[filepath])
265                    shutil.copy(filepath, to)
267            # Create the module
268            result = create_module_from_code(
269                node,
270                filename,
271                on_disk=on_disk,
272                sandbox=tmpdir
273            )
275    return result
278def create_module_from_code(node, filename, on_disk=None, sandbox=None):
279    """
280    Given an AST node and a filename, creates a module object and
281    registers it in sys.modules. The module name is the filename without
282    any extension (.py or otherwise) and the module docstring is
283    extracted from the given AST node if possible (i.e., when the first
284    statement in the module body is a string constant).
286    If on_disk is provided, it should be a full path to the file that the
287    code was parsed from, and will be used to provide a __file__
288    variable while the code runs.
290    If a sandbox is provided, it should be a string indicating the path
291    to a directory which should be set as current and added to the front
292    of sys.path while we execute the code.
293    """
294    module_name = os.path.splitext(filename)[0]
296    # Compile the AST node into executable code
297    bytecode = compile(
298        node,
299        module_name + ".py", # necessary to get __name__ correct
300        "exec"
301    )
303    # Grab module docstring if it exists
304    try:
305        module_docstring = node.body[0].value.value
306    except Exception:
307        module_docstring = ""
309    # Create a new module and insert it into sys.modules (must
310    # happen before execution of the module code!)
311    module = types.ModuleType(module_name, module_docstring)
312    sys.modules[module_name] = module
313    module.__dict__["__name__"] = module_name + ".py"
314    module.__dict__["__file__"] = on_disk
316    if sandbox is None:
317        # Execute the code in the module's dictionary, which fleshes
318        # out the module
319        exec(bytecode, module.__dict__, module.__dict__)
320    else:
321        # If we've been given a sandbox directory, use it
322        prev_dir = os.getcwd()
323        os.chdir(sandbox)
324        sys.path.insert(0, sandbox)
325        try:
326            # Execute the code in the module's dictionary, which fleshes
327            # out the module
328            exec(bytecode, module.__dict__, module.__dict__)
329        finally:
330            sys.path = sys.path[1:]
331            os.chdir(prev_dir)
333    # Return our completed module
334    return module
337def fix_parse(codestring, filename, exn=None):
338    '''
339    Inherited from net.py in Codder.
341    Tries to comment out lines with syntax errors to recover remaining
342    code. Returns a tuple containing the (possibly edited) code string
343    that was parsed, the AST object resulting from the parse, and a list
344    of errors (Exception objects) encountered along the way. If it
345    encounters an unrecoverable exception, it will return None in place
346    of the AST object.
348    This function is recursive, and if given an exception to work with,
349    it starts by commenting out relevant lines of the file before
350    attempting to parse it again.
351    '''
352    try:
353        # if parsing fails for any reason we'll reattempt based on the
354        # error...
355        if exn:
356            # if we encountered an exception, comment out that line and
357            # any previous lines that end with ':' or which are empty or
358            # comments...
359            eindex = exn.lineno - 1
360            lines = codestring.split('\n')
361            lines[eindex] = '## SYNTAX ERROR ## ' + lines[eindex]
363            # Grab lines above too, back to the nearest line which doesn't
364            # end in ':', not counting comments or blank lines. This
365            # helps ensure that if our syntax error is the only statement
366            # in a loop or conditional, that loop/conditional dies with
367            # it.
368            for i in range(eindex - 1, 0, -1):
369                predline = lines[i].strip()
370                if (
371                  predline.endswith(':')
372               or predline.startswith('#')
373               or len(predline) == 0
374                ):
375                    lines[i] = '## SYNTAX ERROR BUDDY ## ' + lines[i]
376                else:
377                    break
378                pass
379            pass
381            # Rebuild our code string with the new comments in place
382            codestring = '\n'.join(lines)
383        pass
385        # Whether or not we just commented out some code, we'll try to
386        # parse what we've got. An error here will throw us into one of
387        # the except clauses below, or bubble out if it's not one we're
388        # expecting.
389        tree = mast.parse(codestring, filename=filename)
391        # Parsing at this level didn't encounter any errors, so our error
392        # list will be empty. Whoever called us is responsible for adding
393        # the error they encountered if they passed us an error to watch
394        # out for.
395        return (codestring, tree, [])
397    except (mast.MastParseError, SyntaxError, IndentationError) as e:
398        # These are expected parsing errors that we're prepared to
399        # address by commenting out code
401        # If it's a MastParseError, process the trigger instead...
402        if isinstance(e, mast.MastParseError):
403            e = e.trigger
405        if not isinstance(e, (SyntaxError, IndentationError)):
406            # A MastParseError not triggered by a syntax/indentation error
407            logging.log("'{}' is not a valid Python file".format(filename))
408            return (codestring, None, [e])
410        if exn and e.lineno == exn.lineno:
411            # if it persists on the same line of code despite introducing
412            # a comment, we give up
413            raise e
414        else:
415            # Recurse to try to fix this new error
416            try:
417                c, a, es = fix_parse(
418                    codestring,
419                    filename,
420                    exn=e
421                )
422            except (SyntaxError, IndentationError) as e:
423                # give up if we couldn't fix it
424                return (codestring, None, [exn] if exn else [e])
425            else:
426                # If there isn't an exception, we can return the code
427                # along with this error plus any other errors
428                return (c, a, [e] + es)
430    except TypeError as e:
431        # Happens e.g., when the file is not a python file
432        logging.log("'{}' is not a valid Python file".format(filename))
433        return (codestring, None, [e])
435    except Exception:
436        logging.log(
437            "Encountered unexpected exception when parsing '{}'"
438            .format(filename)
439        )
440        logging.log_current_exception()
442    # Let any other unexpected errors bubble out
