potluck.load

Functions for loading submitted & solution code.

load.py

  1"""
  2Functions for loading submitted & solution code.
  3
  4load.py
  5"""
  6
  7import importlib
  8import os
  9import sys
 10import types
 11import tempfile
 12import shutil
 13import base64
 14import mimetypes
 15
 16import bs4
 17
 18from . import mast
 19from . import logging
 20from . import render
 21
 22
 23#-------#
 24# Setup #
 25#-------#
 26
 27def setup(specs_dir, sandbox_dir):
 28    """
 29    Sets the specifications and sandbox directories.
 30    """
 31    global SPECS_DIR, SANDBOX_DIR
 32    SPECS_DIR = specs_dir
 33    SANDBOX_DIR = sandbox_dir
 34    # Ensure sandboxes directory exists
 35    os.makedirs(SANDBOX_DIR, exist_ok=True)
 36
 37
 38#------------------#
 39# Loader functions #
 40#------------------#
 41
 42
 43def load_task_spec(task_info):
 44    """
 45    Loads a task specification module for the specified task. Returns the
 46    imported module. Augments the module with the following values:
 47
 48    - taskid: The task ID for the task
 49    - base_path: the path to the spec file
 50    - soln_path: the path to the solution files directory
 51    - starter_path: the path to the starter files directory
 52    - starter_src: the source code for the main starter file
 53        (or an empty string if there is no starter file or if the
 54        task requires more than one file)
 55    - soln_files: all files/directories in the solution directory (not
 56        full paths)
 57    - starter_files: all files/directories in the starter directory (not
 58        full paths)
 59    - helper_files: a list of strings naming files/directories which are
 60        in the starter directory and the solution directory but which
 61        aren't the main task file (or directory) itself. These are just
 62        the file names, not the full paths.
 63    """
 64    # Set up sys.path and import specifically:
 65    # Note: Relevant directories will need __init__.py files!
 66    logging.log("Loading specification for '{}'".format(task_info['id']))
 67    spec_target = os.path.join(SPECS_DIR, task_info["id"], "spec.py")
 68    logging.log("    loading from: {}".format(spec_target))
 69    sys.path.insert(0, SPECS_DIR)
 70    try:
 71        spec = importlib.import_module(task_info["id"] + '.spec')
 72    except Exception:
 73        logging.log("Fatal error: Unable to load task specification.")
 74        logging.log_current_exception()
 75        raise
 76    sys.path.pop(0)
 77
 78    # Augment imported module
 79    here = os.path.dirname(spec.__file__)
 80    spec.taskid = task_info["id"]
 81    spec.base_path = here
 82    spec.soln_path = os.path.join(here, 'soln')
 83    spec.starter_path = os.path.join(here, 'starter')
 84    starter_file = os.path.join(spec.starter_path, task_info["target"])
 85    if os.path.isfile(starter_file):
 86        with open(starter_file, encoding="utf-8") as fin:
 87            spec.starter_src = fin.read()
 88    else:
 89        spec.starter_src = ""
 90
 91    spec.soln_files = os.listdir(spec.soln_path)
 92    if os.path.exists(spec.starter_path):
 93        spec.starter_files = os.listdir(spec.starter_path)
 94    else:
 95        spec.starter_files = []
 96    spec.helper_files = list(
 97        (set(spec.soln_files) & set(spec.starter_files))
 98      - set([task_info["target"]])
 99    )
100
101    logging.log("...done loading specification")
102
103    return spec
104
105
106def import_soln(taskspec):
107    '''
108    Uses importlib to import the solution module for the given task. If
109    the module has already been imported, reloads it.
110
111    Returns the imported module object.
112
113    Fails if this task doesn't have a Python source file.
114    '''
115    # Here we temporarily both change cwd *and* push it onto our sys.path.
116    original_directory = os.getcwd()
117    os.chdir(taskspec.soln_path)
118    sys.path.insert(0, os.getcwd())
119    try:
120        module_name = taskspec.src.replace('.py', '')
121        if module_name in sys.modules:
122            return importlib.reload(sys.modules[module_name])
123        else:
124            return importlib.import_module(module_name)
125    finally:
126        # Reset cwd and sys.path:
127        os.chdir(original_directory)
128        sys.path = sys.path[1:]
129
130
131def load_instructions_html(spec):
132    """
133    Given a specifications module, loads the instructions for that module
134    and converts them from markdown to HTML. This loads "instructions.md"
135    in the spec folder, or if it's not present, the dosctring of the
136    specs module. Logs a message about where it fetched the instructions
137    from.
138
139    Note that the instructions may need resources copied with them in
140    order to render properly, but this function doesn't handle that
141    completely. What it does do is look for resources it knows how to
142    handle (img, audio, and video tags) and embed their data into the
143    HTML result as base64-encoded data: URLs. This makes it easier for
144    the instructions to be embedded in multiple contexts, although it
145    does increase their overall size a bit.
146    """
147    # Default path to check
148    src = os.path.join(spec.base_path, "instructions.md")
149    if os.path.exists(src):
150        with open(src, 'r', encoding="utf-8") as fin:
151            instructions = fin.read()
152        logging.log("Fetched instructions from '{}'...".format(src))
153    elif spec.__doc__:
154        # Pull from spec docstring if there's no instructions.md file
155        instructions = spec.__doc__
156        logging.log(
157            "Fetched instructions from spec module's docstring..."
158        )
159    else:
160        logging.log("Couldn't find any instructions...")
161        instructions = "(no instructions available)"
162
163    # Convert to HTML
164    html = render.render_markdown(instructions)
165
166    # Now we need to embed resource files...
167
168    # Get a bs4 handle for it
169    soup = bs4.BeautifulSoup(html, "html.parser")
170
171    # Find all img, audio, or video tags with src attributes...
172    for tag in soup.find_all(
173        lambda tag: (
174            tag.name in ("img", "audio", "video")
175        and tag.has_attr("src")
176        )
177    ):
178        orig_src = tag["src"]
179        target = os.path.join(spec.base_path, orig_src)
180        if os.path.isfile(target):
181            mime, enc = mimetypes.guess_type(target, strict=False)
182            if mime is None:
183                mime = "text/plain"
184            # TODO: Handle encoding guess!
185            with open(target, 'rb') as fin:
186                src_bytes = fin.read()
187                data = base64.standard_b64encode(src_bytes).decode("utf-8")
188            # Build a data URI and update the src attribute:
189            data_uri = "data:{};base64,{}".format(mime, data)
190            tag['src'] = data_uri
191            # Log a message
192            logging.log("Embedded resource '{}'".format(orig_src))
193        else:
194            # Log a warning and let it go...
195            logging.log(
196                "Warning: resource '{}' was not found.".format(orig_src)
197            )
198
199    return str(soup)
200
201
202#--------------------------#
203# Module loading & parsing #
204#--------------------------#
205
206
207def create_module_in_sandbox(
208    node,
209    filename,
210    sandbox_dir=None,
211    sandbox_links=None,
212    sandbox_files=None,
213    on_disk=None
214):
215    """
216    Given an AST node and a filename, creates a temporary sandbox
217    directory, runs the code in the sandbox to create a module object,
218    and returns the module object that was created.
219
220    An explicit sandbox directory that's already set up may be provided
221    via the `sandbox_dir` parameter. If none is provided, a new sandbox
222    will be created and then destroyed in the course of running this
223    function. If an existing sandbox will be used, `sandbox_links` and
224    `sandbox_files` are ignored.
225
226    If a pre-existing sandbox isn't provided and extra files are needed
227    in the sandbox, a dictionary mapping absolute paths to
228    paths-in-sandbox can be supplied and those files will be symlinked in
229    (see `link_mapping`). Alternatively, an equivalently-structured
230    sandbox_files directory may be supplied to copying files rather than
231    creating links, which is typically less efficient, but desirable if
232    those files will be modified.
233
234    If on_disk is provided, it should be a full path to the file that the
235    code was parsed from, and will be used to provide a __file__
236    variable while the code runs.
237    """
238    if sandbox_dir is not None:
239        # A sandbox directory has been provided ready-to-use; ignore
240        # sandbox_links and sandbox_files.
241
242        # Create the module
243        result = create_module_from_code(
244            node,
245            filename,
246            on_disk=on_disk,
247            sandbox=sandbox_dir
248        )
249    else:
250        # We need to create our own sandbox directory
251        with tempfile.TemporaryDirectory(
252            suffix="__tmp",
253            dir=SANDBOX_DIR
254        ) as tmpdir:
255            # Create symlinks
256            if sandbox_links is not None:
257                for filepath in sandbox_links:
258                    to = os.path.join(tmpdir, sandbox_links[filepath])
259                    os.symlink(filepath, to)
260
261            # Copy files
262            if sandbox_files is not None:
263                for filepath in sandbox_files:
264                    to = os.path.join(tmpdir, sandbox_files[filepath])
265                    shutil.copy(filepath, to)
266
267            # Create the module
268            result = create_module_from_code(
269                node,
270                filename,
271                on_disk=on_disk,
272                sandbox=tmpdir
273            )
274
275    return result
276
277
278def create_module_from_code(node, filename, on_disk=None, sandbox=None):
279    """
280    Given an AST node and a filename, creates a module object and
281    registers it in sys.modules. The module name is the filename without
282    any extension (.py or otherwise) and the module docstring is
283    extracted from the given AST node if possible (i.e., when the first
284    statement in the module body is a string constant).
285
286    If on_disk is provided, it should be a full path to the file that the
287    code was parsed from, and will be used to provide a __file__
288    variable while the code runs.
289
290    If a sandbox is provided, it should be a string indicating the path
291    to a directory which should be set as current and added to the front
292    of sys.path while we execute the code.
293    """
294    module_name = os.path.splitext(filename)[0]
295
296    # Compile the AST node into executable code
297    bytecode = compile(
298        node,
299        module_name + ".py", # necessary to get __name__ correct
300        "exec"
301    )
302
303    # Grab module docstring if it exists
304    try:
305        module_docstring = node.body[0].value.value
306    except Exception:
307        module_docstring = ""
308
309    # Create a new module and insert it into sys.modules (must
310    # happen before execution of the module code!)
311    module = types.ModuleType(module_name, module_docstring)
312    sys.modules[module_name] = module
313    module.__dict__["__name__"] = module_name + ".py"
314    module.__dict__["__file__"] = on_disk
315
316    if sandbox is None:
317        # Execute the code in the module's dictionary, which fleshes
318        # out the module
319        exec(bytecode, module.__dict__, module.__dict__)
320    else:
321        # If we've been given a sandbox directory, use it
322        prev_dir = os.getcwd()
323        os.chdir(sandbox)
324        sys.path.insert(0, sandbox)
325        try:
326            # Execute the code in the module's dictionary, which fleshes
327            # out the module
328            exec(bytecode, module.__dict__, module.__dict__)
329        finally:
330            sys.path = sys.path[1:]
331            os.chdir(prev_dir)
332
333    # Return our completed module
334    return module
335
336
337def fix_parse(codestring, filename, exn=None):
338    '''
339    Inherited from net.py in Codder.
340
341    Tries to comment out lines with syntax errors to recover remaining
342    code. Returns a tuple containing the (possibly edited) code string
343    that was parsed, the AST object resulting from the parse, and a list
344    of errors (Exception objects) encountered along the way. If it
345    encounters an unrecoverable exception, it will return None in place
346    of the AST object.
347
348    This function is recursive, and if given an exception to work with,
349    it starts by commenting out relevant lines of the file before
350    attempting to parse it again.
351    '''
352    try:
353        # if parsing fails for any reason we'll reattempt based on the
354        # error...
355        if exn:
356            # if we encountered an exception, comment out that line and
357            # any previous lines that end with ':' or which are empty or
358            # comments...
359            eindex = exn.lineno - 1
360            lines = codestring.split('\n')
361            lines[eindex] = '## SYNTAX ERROR ## ' + lines[eindex]
362
363            # Grab lines above too, back to the nearest line which doesn't
364            # end in ':', not counting comments or blank lines. This
365            # helps ensure that if our syntax error is the only statement
366            # in a loop or conditional, that loop/conditional dies with
367            # it.
368            for i in range(eindex - 1, 0, -1):
369                predline = lines[i].strip()
370                if (
371                  predline.endswith(':')
372               or predline.startswith('#')
373               or len(predline) == 0
374                ):
375                    lines[i] = '## SYNTAX ERROR BUDDY ## ' + lines[i]
376                else:
377                    break
378                pass
379            pass
380
381            # Rebuild our code string with the new comments in place
382            codestring = '\n'.join(lines)
383        pass
384
385        # Whether or not we just commented out some code, we'll try to
386        # parse what we've got. An error here will throw us into one of
387        # the except clauses below, or bubble out if it's not one we're
388        # expecting.
389        tree = mast.parse(codestring, filename=filename)
390
391        # Parsing at this level didn't encounter any errors, so our error
392        # list will be empty. Whoever called us is responsible for adding
393        # the error they encountered if they passed us an error to watch
394        # out for.
395        return (codestring, tree, [])
396
397    except (mast.MastParseError, SyntaxError, IndentationError) as e:
398        # These are expected parsing errors that we're prepared to
399        # address by commenting out code
400
401        # If it's a MastParseError, process the trigger instead...
402        if isinstance(e, mast.MastParseError):
403            e = e.trigger
404
405        if not isinstance(e, (SyntaxError, IndentationError)):
406            # A MastParseError not triggered by a syntax/indentation error
407            logging.log("'{}' is not a valid Python file".format(filename))
408            return (codestring, None, [e])
409
410        if exn and e.lineno == exn.lineno:
411            # if it persists on the same line of code despite introducing
412            # a comment, we give up
413            raise e
414        else:
415            # Recurse to try to fix this new error
416            try:
417                c, a, es = fix_parse(
418                    codestring,
419                    filename,
420                    exn=e
421                )
422            except (SyntaxError, IndentationError) as e:
423                # give up if we couldn't fix it
424                return (codestring, None, [exn] if exn else [e])
425            else:
426                # If there isn't an exception, we can return the code
427                # along with this error plus any other errors
428                return (c, a, [e] + es)
429
430    except TypeError as e:
431        # Happens e.g., when the file is not a python file
432        logging.log("'{}' is not a valid Python file".format(filename))
433        return (codestring, None, [e])
434
435    except Exception:
436        logging.log(
437            "Encountered unexpected exception when parsing '{}'"
438            .format(filename)
439        )
440        logging.log_current_exception()
441
442    # Let any other unexpected errors bubble out
def setup(specs_dir, sandbox_dir):
28def setup(specs_dir, sandbox_dir):
29    """
30    Sets the specifications and sandbox directories.
31    """
32    global SPECS_DIR, SANDBOX_DIR
33    SPECS_DIR = specs_dir
34    SANDBOX_DIR = sandbox_dir
35    # Ensure sandboxes directory exists
36    os.makedirs(SANDBOX_DIR, exist_ok=True)

Sets the specifications and sandbox directories.

def load_task_spec(task_info):
 44def load_task_spec(task_info):
 45    """
 46    Loads a task specification module for the specified task. Returns the
 47    imported module. Augments the module with the following values:
 48
 49    - taskid: The task ID for the task
 50    - base_path: the path to the spec file
 51    - soln_path: the path to the solution files directory
 52    - starter_path: the path to the starter files directory
 53    - starter_src: the source code for the main starter file
 54        (or an empty string if there is no starter file or if the
 55        task requires more than one file)
 56    - soln_files: all files/directories in the solution directory (not
 57        full paths)
 58    - starter_files: all files/directories in the starter directory (not
 59        full paths)
 60    - helper_files: a list of strings naming files/directories which are
 61        in the starter directory and the solution directory but which
 62        aren't the main task file (or directory) itself. These are just
 63        the file names, not the full paths.
 64    """
 65    # Set up sys.path and import specifically:
 66    # Note: Relevant directories will need __init__.py files!
 67    logging.log("Loading specification for '{}'".format(task_info['id']))
 68    spec_target = os.path.join(SPECS_DIR, task_info["id"], "spec.py")
 69    logging.log("    loading from: {}".format(spec_target))
 70    sys.path.insert(0, SPECS_DIR)
 71    try:
 72        spec = importlib.import_module(task_info["id"] + '.spec')
 73    except Exception:
 74        logging.log("Fatal error: Unable to load task specification.")
 75        logging.log_current_exception()
 76        raise
 77    sys.path.pop(0)
 78
 79    # Augment imported module
 80    here = os.path.dirname(spec.__file__)
 81    spec.taskid = task_info["id"]
 82    spec.base_path = here
 83    spec.soln_path = os.path.join(here, 'soln')
 84    spec.starter_path = os.path.join(here, 'starter')
 85    starter_file = os.path.join(spec.starter_path, task_info["target"])
 86    if os.path.isfile(starter_file):
 87        with open(starter_file, encoding="utf-8") as fin:
 88            spec.starter_src = fin.read()
 89    else:
 90        spec.starter_src = ""
 91
 92    spec.soln_files = os.listdir(spec.soln_path)
 93    if os.path.exists(spec.starter_path):
 94        spec.starter_files = os.listdir(spec.starter_path)
 95    else:
 96        spec.starter_files = []
 97    spec.helper_files = list(
 98        (set(spec.soln_files) & set(spec.starter_files))
 99      - set([task_info["target"]])
100    )
101
102    logging.log("...done loading specification")
103
104    return spec

Loads a task specification module for the specified task. Returns the imported module. Augments the module with the following values:

  • taskid: The task ID for the task
  • base_path: the path to the spec file
  • soln_path: the path to the solution files directory
  • starter_path: the path to the starter files directory
  • starter_src: the source code for the main starter file (or an empty string if there is no starter file or if the task requires more than one file)
  • soln_files: all files/directories in the solution directory (not full paths)
  • starter_files: all files/directories in the starter directory (not full paths)
  • helper_files: a list of strings naming files/directories which are in the starter directory and the solution directory but which aren't the main task file (or directory) itself. These are just the file names, not the full paths.
def import_soln(taskspec):
107def import_soln(taskspec):
108    '''
109    Uses importlib to import the solution module for the given task. If
110    the module has already been imported, reloads it.
111
112    Returns the imported module object.
113
114    Fails if this task doesn't have a Python source file.
115    '''
116    # Here we temporarily both change cwd *and* push it onto our sys.path.
117    original_directory = os.getcwd()
118    os.chdir(taskspec.soln_path)
119    sys.path.insert(0, os.getcwd())
120    try:
121        module_name = taskspec.src.replace('.py', '')
122        if module_name in sys.modules:
123            return importlib.reload(sys.modules[module_name])
124        else:
125            return importlib.import_module(module_name)
126    finally:
127        # Reset cwd and sys.path:
128        os.chdir(original_directory)
129        sys.path = sys.path[1:]

Uses importlib to import the solution module for the given task. If the module has already been imported, reloads it.

Returns the imported module object.

Fails if this task doesn't have a Python source file.

def load_instructions_html(spec):
132def load_instructions_html(spec):
133    """
134    Given a specifications module, loads the instructions for that module
135    and converts them from markdown to HTML. This loads "instructions.md"
136    in the spec folder, or if it's not present, the dosctring of the
137    specs module. Logs a message about where it fetched the instructions
138    from.
139
140    Note that the instructions may need resources copied with them in
141    order to render properly, but this function doesn't handle that
142    completely. What it does do is look for resources it knows how to
143    handle (img, audio, and video tags) and embed their data into the
144    HTML result as base64-encoded data: URLs. This makes it easier for
145    the instructions to be embedded in multiple contexts, although it
146    does increase their overall size a bit.
147    """
148    # Default path to check
149    src = os.path.join(spec.base_path, "instructions.md")
150    if os.path.exists(src):
151        with open(src, 'r', encoding="utf-8") as fin:
152            instructions = fin.read()
153        logging.log("Fetched instructions from '{}'...".format(src))
154    elif spec.__doc__:
155        # Pull from spec docstring if there's no instructions.md file
156        instructions = spec.__doc__
157        logging.log(
158            "Fetched instructions from spec module's docstring..."
159        )
160    else:
161        logging.log("Couldn't find any instructions...")
162        instructions = "(no instructions available)"
163
164    # Convert to HTML
165    html = render.render_markdown(instructions)
166
167    # Now we need to embed resource files...
168
169    # Get a bs4 handle for it
170    soup = bs4.BeautifulSoup(html, "html.parser")
171
172    # Find all img, audio, or video tags with src attributes...
173    for tag in soup.find_all(
174        lambda tag: (
175            tag.name in ("img", "audio", "video")
176        and tag.has_attr("src")
177        )
178    ):
179        orig_src = tag["src"]
180        target = os.path.join(spec.base_path, orig_src)
181        if os.path.isfile(target):
182            mime, enc = mimetypes.guess_type(target, strict=False)
183            if mime is None:
184                mime = "text/plain"
185            # TODO: Handle encoding guess!
186            with open(target, 'rb') as fin:
187                src_bytes = fin.read()
188                data = base64.standard_b64encode(src_bytes).decode("utf-8")
189            # Build a data URI and update the src attribute:
190            data_uri = "data:{};base64,{}".format(mime, data)
191            tag['src'] = data_uri
192            # Log a message
193            logging.log("Embedded resource '{}'".format(orig_src))
194        else:
195            # Log a warning and let it go...
196            logging.log(
197                "Warning: resource '{}' was not found.".format(orig_src)
198            )
199
200    return str(soup)

Given a specifications module, loads the instructions for that module and converts them from markdown to HTML. This loads "instructions.md" in the spec folder, or if it's not present, the dosctring of the specs module. Logs a message about where it fetched the instructions from.

Note that the instructions may need resources copied with them in order to render properly, but this function doesn't handle that completely. What it does do is look for resources it knows how to handle (img, audio, and video tags) and embed their data into the HTML result as base64-encoded data: URLs. This makes it easier for the instructions to be embedded in multiple contexts, although it does increase their overall size a bit.

def create_module_in_sandbox( node, filename, sandbox_dir=None, sandbox_links=None, sandbox_files=None, on_disk=None):
208def create_module_in_sandbox(
209    node,
210    filename,
211    sandbox_dir=None,
212    sandbox_links=None,
213    sandbox_files=None,
214    on_disk=None
215):
216    """
217    Given an AST node and a filename, creates a temporary sandbox
218    directory, runs the code in the sandbox to create a module object,
219    and returns the module object that was created.
220
221    An explicit sandbox directory that's already set up may be provided
222    via the `sandbox_dir` parameter. If none is provided, a new sandbox
223    will be created and then destroyed in the course of running this
224    function. If an existing sandbox will be used, `sandbox_links` and
225    `sandbox_files` are ignored.
226
227    If a pre-existing sandbox isn't provided and extra files are needed
228    in the sandbox, a dictionary mapping absolute paths to
229    paths-in-sandbox can be supplied and those files will be symlinked in
230    (see `link_mapping`). Alternatively, an equivalently-structured
231    sandbox_files directory may be supplied to copying files rather than
232    creating links, which is typically less efficient, but desirable if
233    those files will be modified.
234
235    If on_disk is provided, it should be a full path to the file that the
236    code was parsed from, and will be used to provide a __file__
237    variable while the code runs.
238    """
239    if sandbox_dir is not None:
240        # A sandbox directory has been provided ready-to-use; ignore
241        # sandbox_links and sandbox_files.
242
243        # Create the module
244        result = create_module_from_code(
245            node,
246            filename,
247            on_disk=on_disk,
248            sandbox=sandbox_dir
249        )
250    else:
251        # We need to create our own sandbox directory
252        with tempfile.TemporaryDirectory(
253            suffix="__tmp",
254            dir=SANDBOX_DIR
255        ) as tmpdir:
256            # Create symlinks
257            if sandbox_links is not None:
258                for filepath in sandbox_links:
259                    to = os.path.join(tmpdir, sandbox_links[filepath])
260                    os.symlink(filepath, to)
261
262            # Copy files
263            if sandbox_files is not None:
264                for filepath in sandbox_files:
265                    to = os.path.join(tmpdir, sandbox_files[filepath])
266                    shutil.copy(filepath, to)
267
268            # Create the module
269            result = create_module_from_code(
270                node,
271                filename,
272                on_disk=on_disk,
273                sandbox=tmpdir
274            )
275
276    return result

Given an AST node and a filename, creates a temporary sandbox directory, runs the code in the sandbox to create a module object, and returns the module object that was created.

An explicit sandbox directory that's already set up may be provided via the sandbox_dir parameter. If none is provided, a new sandbox will be created and then destroyed in the course of running this function. If an existing sandbox will be used, sandbox_links and sandbox_files are ignored.

If a pre-existing sandbox isn't provided and extra files are needed in the sandbox, a dictionary mapping absolute paths to paths-in-sandbox can be supplied and those files will be symlinked in (see link_mapping). Alternatively, an equivalently-structured sandbox_files directory may be supplied to copying files rather than creating links, which is typically less efficient, but desirable if those files will be modified.

If on_disk is provided, it should be a full path to the file that the code was parsed from, and will be used to provide a __file__ variable while the code runs.

def create_module_from_code(node, filename, on_disk=None, sandbox=None):
279def create_module_from_code(node, filename, on_disk=None, sandbox=None):
280    """
281    Given an AST node and a filename, creates a module object and
282    registers it in sys.modules. The module name is the filename without
283    any extension (.py or otherwise) and the module docstring is
284    extracted from the given AST node if possible (i.e., when the first
285    statement in the module body is a string constant).
286
287    If on_disk is provided, it should be a full path to the file that the
288    code was parsed from, and will be used to provide a __file__
289    variable while the code runs.
290
291    If a sandbox is provided, it should be a string indicating the path
292    to a directory which should be set as current and added to the front
293    of sys.path while we execute the code.
294    """
295    module_name = os.path.splitext(filename)[0]
296
297    # Compile the AST node into executable code
298    bytecode = compile(
299        node,
300        module_name + ".py", # necessary to get __name__ correct
301        "exec"
302    )
303
304    # Grab module docstring if it exists
305    try:
306        module_docstring = node.body[0].value.value
307    except Exception:
308        module_docstring = ""
309
310    # Create a new module and insert it into sys.modules (must
311    # happen before execution of the module code!)
312    module = types.ModuleType(module_name, module_docstring)
313    sys.modules[module_name] = module
314    module.__dict__["__name__"] = module_name + ".py"
315    module.__dict__["__file__"] = on_disk
316
317    if sandbox is None:
318        # Execute the code in the module's dictionary, which fleshes
319        # out the module
320        exec(bytecode, module.__dict__, module.__dict__)
321    else:
322        # If we've been given a sandbox directory, use it
323        prev_dir = os.getcwd()
324        os.chdir(sandbox)
325        sys.path.insert(0, sandbox)
326        try:
327            # Execute the code in the module's dictionary, which fleshes
328            # out the module
329            exec(bytecode, module.__dict__, module.__dict__)
330        finally:
331            sys.path = sys.path[1:]
332            os.chdir(prev_dir)
333
334    # Return our completed module
335    return module

Given an AST node and a filename, creates a module object and registers it in sys.modules. The module name is the filename without any extension (.py or otherwise) and the module docstring is extracted from the given AST node if possible (i.e., when the first statement in the module body is a string constant).

If on_disk is provided, it should be a full path to the file that the code was parsed from, and will be used to provide a __file__ variable while the code runs.

If a sandbox is provided, it should be a string indicating the path to a directory which should be set as current and added to the front of sys.path while we execute the code.

def fix_parse(codestring, filename, exn=None):
338def fix_parse(codestring, filename, exn=None):
339    '''
340    Inherited from net.py in Codder.
341
342    Tries to comment out lines with syntax errors to recover remaining
343    code. Returns a tuple containing the (possibly edited) code string
344    that was parsed, the AST object resulting from the parse, and a list
345    of errors (Exception objects) encountered along the way. If it
346    encounters an unrecoverable exception, it will return None in place
347    of the AST object.
348
349    This function is recursive, and if given an exception to work with,
350    it starts by commenting out relevant lines of the file before
351    attempting to parse it again.
352    '''
353    try:
354        # if parsing fails for any reason we'll reattempt based on the
355        # error...
356        if exn:
357            # if we encountered an exception, comment out that line and
358            # any previous lines that end with ':' or which are empty or
359            # comments...
360            eindex = exn.lineno - 1
361            lines = codestring.split('\n')
362            lines[eindex] = '## SYNTAX ERROR ## ' + lines[eindex]
363
364            # Grab lines above too, back to the nearest line which doesn't
365            # end in ':', not counting comments or blank lines. This
366            # helps ensure that if our syntax error is the only statement
367            # in a loop or conditional, that loop/conditional dies with
368            # it.
369            for i in range(eindex - 1, 0, -1):
370                predline = lines[i].strip()
371                if (
372                  predline.endswith(':')
373               or predline.startswith('#')
374               or len(predline) == 0
375                ):
376                    lines[i] = '## SYNTAX ERROR BUDDY ## ' + lines[i]
377                else:
378                    break
379                pass
380            pass
381
382            # Rebuild our code string with the new comments in place
383            codestring = '\n'.join(lines)
384        pass
385
386        # Whether or not we just commented out some code, we'll try to
387        # parse what we've got. An error here will throw us into one of
388        # the except clauses below, or bubble out if it's not one we're
389        # expecting.
390        tree = mast.parse(codestring, filename=filename)
391
392        # Parsing at this level didn't encounter any errors, so our error
393        # list will be empty. Whoever called us is responsible for adding
394        # the error they encountered if they passed us an error to watch
395        # out for.
396        return (codestring, tree, [])
397
398    except (mast.MastParseError, SyntaxError, IndentationError) as e:
399        # These are expected parsing errors that we're prepared to
400        # address by commenting out code
401
402        # If it's a MastParseError, process the trigger instead...
403        if isinstance(e, mast.MastParseError):
404            e = e.trigger
405
406        if not isinstance(e, (SyntaxError, IndentationError)):
407            # A MastParseError not triggered by a syntax/indentation error
408            logging.log("'{}' is not a valid Python file".format(filename))
409            return (codestring, None, [e])
410
411        if exn and e.lineno == exn.lineno:
412            # if it persists on the same line of code despite introducing
413            # a comment, we give up
414            raise e
415        else:
416            # Recurse to try to fix this new error
417            try:
418                c, a, es = fix_parse(
419                    codestring,
420                    filename,
421                    exn=e
422                )
423            except (SyntaxError, IndentationError) as e:
424                # give up if we couldn't fix it
425                return (codestring, None, [exn] if exn else [e])
426            else:
427                # If there isn't an exception, we can return the code
428                # along with this error plus any other errors
429                return (c, a, [e] + es)
430
431    except TypeError as e:
432        # Happens e.g., when the file is not a python file
433        logging.log("'{}' is not a valid Python file".format(filename))
434        return (codestring, None, [e])
435
436    except Exception:
437        logging.log(
438            "Encountered unexpected exception when parsing '{}'"
439            .format(filename)
440        )
441        logging.log_current_exception()
442
443    # Let any other unexpected errors bubble out

Inherited from net.py in Codder.

Tries to comment out lines with syntax errors to recover remaining code. Returns a tuple containing the (possibly edited) code string that was parsed, the AST object resulting from the parse, and a list of errors (Exception objects) encountered along the way. If it encounters an unrecoverable exception, it will return None in place of the AST object.

This function is recursive, and if given an exception to work with, it starts by commenting out relevant lines of the file before attempting to parse it again.