potluck.tests.test_eval

Tests of the potluck_eval script.

test_eval.py

  1"""
  2Tests of the potluck_eval script.
  3
  4test_eval.py
  5"""
  6
  7import os
  8import json
  9import pathlib
 10import subprocess
 11
 12import pytest
 13import importlib_resources
 14
 15from .. import render
 16from .._version import __version__ as potluck_version
 17
 18# Where to import potluck from so that we're testing the same potluck...
 19# (Note: potluck_eval script is just what's installed...)
 20IMPORT_FROM = str(pathlib.Path(__file__).parent.parent.parent)
 21
 22# Expected strings in rubrics
 23# TODO: more detailed rubric expectations
 24RUBRIC_EXPECTS = {
 25    "debugTest": [ "<title>debugTest Rubric</title>" ],
 26    "interactiveTest": [ "<title>interactiveTest Rubric</title>" ],
 27    "sceneTest": [ "<title>sceneTest Rubric</title>" ],
 28    "functionsTest": [
 29        "<title>functionsTest Rubric</title>",
 30        "<h1>Rubric for functionsTest</h1>",
 31        "All functions are documented",
 32        "Define <code>indentMessage</code>",
 33        (
 34            "The <code>polygon</code> function must maintain"
 35            " invariants for the <code>position</code> and"
 36            " <code>heading</code> values"
 37        ),
 38        "<code>ellipseArea</code> must return the correct result",
 39    ],
 40    "freedomTest": [ "<title>freedomTest Rubric</title>" ],
 41    "snippetsTest": [
 42        "<title>snippetsTest Rubric</title>",
 43        "<h1>Rubric for snippetsTest</h1>",
 44        "processData",
 45        "must return the correct result",
 46        "process.py",
 47        "must exhibit the correct behavior"
 48    ],
 49    "filesTest": [ "<title>filesTest Rubric</title>" ],
 50    "varsTest": [ "<title>varsTest Rubric</title>" ],
 51}
 52
 53# Expectations about reports
 54REPORT_EXPECTS = {
 55    "functionsTest": {
 56        "perfect": { "evaluation": "excellent" },
 57        "imperfect": { "evaluation": "partially complete" },
 58    },
 59    "debugTest": {
 60        "perfect": { "evaluation": "excellent" },
 61        "imperfect": { "evaluation": "incomplete" },
 62    },
 63    "sceneTest": {
 64        "perfect": { "evaluation": "excellent" },
 65        "imperfect": { "evaluation": "partially complete" },
 66    },
 67    "interactiveTest": {
 68        "perfect": { "evaluation": "excellent" },
 69        "imperfect": { "evaluation": "partially complete" },
 70    },
 71    "freedomTest": {
 72        "perfect": { "evaluation": "excellent" },
 73        "imperfect": { "evaluation": "partially complete" },
 74    },
 75    "snippetsTest": {
 76        "perfect": { "evaluation": "excellent" },
 77        "imperfect": { "evaluation": "partially complete" },
 78    },
 79    "filesTest": {
 80        "perfect": { "evaluation": "excellent" },
 81        "imperfect": { "evaluation": "partially complete" },
 82    },
 83    "varsTest": {
 84        "perfect": { "evaluation": "excellent" },
 85        "imperfect": { "evaluation": "partially complete" },
 86    }
 87}
 88
 89# Expectations about validation reports
 90VALIDATION_EXPECTS = {
 91    "functionsTest": {
 92        "perfect": { "evaluation": "excellent" },
 93        "imperfect": { "evaluation": "partially complete" },
 94    },
 95    "synthTest": {
 96        "perfect": { "evaluation": "excellent" },
 97        "imperfect": { "evaluation": "partially complete" },
 98    }
 99}
100
101# TODO: Expectations for instructions and for snippets!
102
103
104@pytest.fixture(
105    params=[
106        "functionsTest",
107        "debugTest",
108        "interactiveTest",
109        "sceneTest",
110        "freedomTest",
111        "snippetsTest",
112        "filesTest",
113        "varsTest",
114    ]
115)
116def taskid(request):
117    """
118    Parameterized fixture that provides a task ID string.
119    """
120    return request.param
121
122
123@pytest.fixture(params=["perfect", "imperfect"])
124def username(request):
125    """
126    Parameterized fixture that provides a username string.
127    """
128    return request.param
129
130
131@pytest.fixture
132def in_evaldir():
133    """
134    Sets the current directory to the testarea evaluation directory.
135    Yields that directory as a pathlib.Path.
136    """
137    if (
138        hasattr(importlib_resources, "files")
139    and hasattr(importlib_resources, "as_file")
140    ):
141        # For newer versions of importlib_resources
142        taPath = importlib_resources.files("potluck").joinpath("testarea")
143        with importlib_resources.as_file(taPath) as testarea:
144            evaldir = testarea / "test_course" / "fall2021"
145            old_dir = os.getcwd()
146            os.chdir(evaldir)
147            yield evaldir
148            os.chdir(old_dir)
149    else:
150        with importlib_resources.path("potluck", "testarea") as testarea:
151            evaldir = testarea / "test_course" / "fall2021"
152            old_dir = os.getcwd()
153            os.chdir(evaldir)
154            yield evaldir
155            os.chdir(old_dir)
156
157
158@pytest.fixture
159def logfile():
160    """
161    A fixture that yields a log filename and removes that file after the
162    test is complete. The test must create the file.
163    """
164    result = pathlib.Path("logs", "pytest.log")
165    yield result
166    try:
167        result.unlink()
168    except Exception:
169        pass
170
171
172@pytest.fixture
173def rubricfile(taskid):
174    """
175    A fixture that yields a rubric filename and removes that file after
176    the test is complete. The test must create the file.
177    """
178    result = pathlib.Path("rubrics", f"rubric-{taskid}.html")
179    yield result
180    try:
181        result.unlink()
182    except Exception:
183        pass
184
185
186@pytest.fixture
187def reportfiles(taskid, username):
188    """
189    A fixture that yields a pair of report JSON and HTML filenames and
190    removes those files after the test is complete. The test must create
191    the file.
192    """
193    r_json = pathlib.Path("reports", f"pytest-{username}-{taskid}.json")
194    r_html = r_json.with_suffix(".html")
195    yield (r_json, r_html)
196    try:
197        r_json.unlink()
198    except Exception:
199        pass
200    try:
201        r_html.unlink()
202    except Exception:
203        pass
204
205
206@pytest.fixture
207def validationreportfiles(taskid, username):
208    """
209    A fixture that yields a pair of validation report JSON and HTML
210    filenames and removes those files after the test is complete. The
211    test must create the file.
212    """
213    r_json = pathlib.Path(
214        "reports",
215        f"pytest-{username}-{taskid}-validation.json"
216    )
217    r_html = r_json.with_suffix(".html")
218    yield (r_json, r_html)
219    try:
220        r_json.unlink()
221    except Exception:
222        pass
223    try:
224        r_html.unlink()
225    except Exception:
226        pass
227
228
229def check_log_is_clean(logfile):
230    """
231    Helper that checks for a clean log file.
232    """
233    assert logfile.is_file()
234    with logfile.open() as fin:
235        log = fin.read()
236    assert log.splitlines()[0] == (
237        f"This is potluck version {potluck_version}"
238    )
239    assert render.ERROR_MSG not in log
240    assert render.DONE_MSG in log
241
242
243def test_rubric_creation(in_evaldir, taskid, logfile, rubricfile):
244    """
245    Tests rubric creation for a particular task.
246    """
247    assert not logfile.exists()
248    assert not rubricfile.exists()
249    result = subprocess.run(
250        [
251            "potluck_eval",
252            "--import-from", IMPORT_FROM,
253            "-t", taskid,
254            "--rubric",
255            "--log", str(logfile)
256        ]
257    )
258    assert result.returncode == 0
259    check_log_is_clean(logfile)
260
261    assert rubricfile.is_file()
262
263    # Look for expected strings in created rubric
264    if taskid in RUBRIC_EXPECTS:
265        with rubricfile.open() as fin:
266            contents = fin.read()
267
268        for expected in RUBRIC_EXPECTS[taskid]:
269            assert expected in contents
270
271
272def test_evaluation(in_evaldir, taskid, username, reportfiles, logfile):
273    """
274    Tests the potluck_eval script for a certain task/user example.
275    """
276    assert not logfile.exists()
277    r_json, r_html = reportfiles
278    assert not r_json.exists()
279    assert not r_html.exists()
280    result = subprocess.run(
281        [
282            "potluck_eval",
283            "--import-from", IMPORT_FROM,
284            "-t", taskid,
285            "-u", username,
286            "--log", str(logfile),
287            "--outfile", str(r_json)
288        ]
289    )
290    assert result.returncode == 0
291    check_log_is_clean(logfile)
292
293    assert r_json.is_file()
294    assert r_html.is_file()
295
296    with r_json.open() as fin:
297        report = json.load(fin)
298
299    if taskid in REPORT_EXPECTS:
300        if username in REPORT_EXPECTS[taskid]:
301            expectations = REPORT_EXPECTS[taskid][username]
302            for key in expectations:
303                assert key in report
304                with open("/home/pmwh/tmp/report.html", 'w') as fout:
305                    with r_html.open() as fin:
306                        fout.write(fin.read())
307                assert report[key] == expectations[key], (taskid, username)
308
309
310def test_specifications_checks(in_evaldir, taskid, logfile):
311    """
312    A meta-meta test that runs the build-in specifications tests on the
313    example specifications to make sure they test clean.
314    """
315    assert not logfile.exists()
316    result = subprocess.run(
317        [
318            "potluck_eval",
319            "--import-from", IMPORT_FROM,
320            "-t", taskid,
321            "--check",
322            "--log", str(logfile)
323        ]
324    )
325    assert result.returncode == 0
326    check_log_is_clean(logfile)
327
328    # Look for expected strings in the log file
329    with logfile.open() as fin:
330        log = fin.read()
331
332    assert "All examples met expectations." in log
333    assert "Check of solution code passed." in log
334
335
336def test_validation(
337    in_evaldir,
338    taskid,
339    username,
340    validationreportfiles,
341    logfile
342):
343    """
344    Tests the potluck_eval script validation mode for a certain
345    task/user example.
346    """
347    # Skip this test if there aren't any expectations for it: not all
348    # tasks can be validated (they may not define any validation goals).
349    if taskid not in VALIDATION_EXPECTS:
350        return
351    assert not logfile.exists()
352    r_json, r_html = validationreportfiles
353    assert not r_json.exists()
354    assert not r_html.exists()
355    result = subprocess.run(
356        [
357            "potluck_eval",
358            "--validate",
359            "--import-from", IMPORT_FROM,
360            "-t", taskid,
361            "-u", username,
362            "--log", str(logfile),
363            "--outfile", str(r_json)
364        ]
365    )
366    assert result.returncode == 0
367    check_log_is_clean(logfile)
368
369    assert r_json.is_file()
370    assert r_html.is_file()
371
372    with r_json.open() as fin:
373        report = json.load(fin)
374
375    if taskid in VALIDATION_EXPECTS:
376        if username in VALIDATION_EXPECTS[taskid]:
377            expectations = VALIDATION_EXPECTS[taskid][username]
378            for key in expectations:
379                assert key in report
380                assert report[key] == expectations[key]
@pytest.fixture(params=['functionsTest', 'debugTest', 'interactiveTest', 'sceneTest', 'freedomTest', 'snippetsTest', 'filesTest', 'varsTest'])
def taskid(request):
105@pytest.fixture(
106    params=[
107        "functionsTest",
108        "debugTest",
109        "interactiveTest",
110        "sceneTest",
111        "freedomTest",
112        "snippetsTest",
113        "filesTest",
114        "varsTest",
115    ]
116)
117def taskid(request):
118    """
119    Parameterized fixture that provides a task ID string.
120    """
121    return request.param

Parameterized fixture that provides a task ID string.

@pytest.fixture(params=['perfect', 'imperfect'])
def username(request):
124@pytest.fixture(params=["perfect", "imperfect"])
125def username(request):
126    """
127    Parameterized fixture that provides a username string.
128    """
129    return request.param

Parameterized fixture that provides a username string.

@pytest.fixture
def in_evaldir():
132@pytest.fixture
133def in_evaldir():
134    """
135    Sets the current directory to the testarea evaluation directory.
136    Yields that directory as a pathlib.Path.
137    """
138    if (
139        hasattr(importlib_resources, "files")
140    and hasattr(importlib_resources, "as_file")
141    ):
142        # For newer versions of importlib_resources
143        taPath = importlib_resources.files("potluck").joinpath("testarea")
144        with importlib_resources.as_file(taPath) as testarea:
145            evaldir = testarea / "test_course" / "fall2021"
146            old_dir = os.getcwd()
147            os.chdir(evaldir)
148            yield evaldir
149            os.chdir(old_dir)
150    else:
151        with importlib_resources.path("potluck", "testarea") as testarea:
152            evaldir = testarea / "test_course" / "fall2021"
153            old_dir = os.getcwd()
154            os.chdir(evaldir)
155            yield evaldir
156            os.chdir(old_dir)

Sets the current directory to the testarea evaluation directory. Yields that directory as a pathlib.Path.

@pytest.fixture
def logfile():
159@pytest.fixture
160def logfile():
161    """
162    A fixture that yields a log filename and removes that file after the
163    test is complete. The test must create the file.
164    """
165    result = pathlib.Path("logs", "pytest.log")
166    yield result
167    try:
168        result.unlink()
169    except Exception:
170        pass

A fixture that yields a log filename and removes that file after the test is complete. The test must create the file.

@pytest.fixture
def rubricfile(taskid):
173@pytest.fixture
174def rubricfile(taskid):
175    """
176    A fixture that yields a rubric filename and removes that file after
177    the test is complete. The test must create the file.
178    """
179    result = pathlib.Path("rubrics", f"rubric-{taskid}.html")
180    yield result
181    try:
182        result.unlink()
183    except Exception:
184        pass

A fixture that yields a rubric filename and removes that file after the test is complete. The test must create the file.

@pytest.fixture
def reportfiles(taskid, username):
187@pytest.fixture
188def reportfiles(taskid, username):
189    """
190    A fixture that yields a pair of report JSON and HTML filenames and
191    removes those files after the test is complete. The test must create
192    the file.
193    """
194    r_json = pathlib.Path("reports", f"pytest-{username}-{taskid}.json")
195    r_html = r_json.with_suffix(".html")
196    yield (r_json, r_html)
197    try:
198        r_json.unlink()
199    except Exception:
200        pass
201    try:
202        r_html.unlink()
203    except Exception:
204        pass

A fixture that yields a pair of report JSON and HTML filenames and removes those files after the test is complete. The test must create the file.

@pytest.fixture
def validationreportfiles(taskid, username):
207@pytest.fixture
208def validationreportfiles(taskid, username):
209    """
210    A fixture that yields a pair of validation report JSON and HTML
211    filenames and removes those files after the test is complete. The
212    test must create the file.
213    """
214    r_json = pathlib.Path(
215        "reports",
216        f"pytest-{username}-{taskid}-validation.json"
217    )
218    r_html = r_json.with_suffix(".html")
219    yield (r_json, r_html)
220    try:
221        r_json.unlink()
222    except Exception:
223        pass
224    try:
225        r_html.unlink()
226    except Exception:
227        pass

A fixture that yields a pair of validation report JSON and HTML filenames and removes those files after the test is complete. The test must create the file.

def check_log_is_clean(logfile):
230def check_log_is_clean(logfile):
231    """
232    Helper that checks for a clean log file.
233    """
234    assert logfile.is_file()
235    with logfile.open() as fin:
236        log = fin.read()
237    assert log.splitlines()[0] == (
238        f"This is potluck version {potluck_version}"
239    )
240    assert render.ERROR_MSG not in log
241    assert render.DONE_MSG in log

Helper that checks for a clean log file.

def test_rubric_creation(in_evaldir, taskid, logfile, rubricfile):
244def test_rubric_creation(in_evaldir, taskid, logfile, rubricfile):
245    """
246    Tests rubric creation for a particular task.
247    """
248    assert not logfile.exists()
249    assert not rubricfile.exists()
250    result = subprocess.run(
251        [
252            "potluck_eval",
253            "--import-from", IMPORT_FROM,
254            "-t", taskid,
255            "--rubric",
256            "--log", str(logfile)
257        ]
258    )
259    assert result.returncode == 0
260    check_log_is_clean(logfile)
261
262    assert rubricfile.is_file()
263
264    # Look for expected strings in created rubric
265    if taskid in RUBRIC_EXPECTS:
266        with rubricfile.open() as fin:
267            contents = fin.read()
268
269        for expected in RUBRIC_EXPECTS[taskid]:
270            assert expected in contents

Tests rubric creation for a particular task.

def test_evaluation(in_evaldir, taskid, username, reportfiles, logfile):
273def test_evaluation(in_evaldir, taskid, username, reportfiles, logfile):
274    """
275    Tests the potluck_eval script for a certain task/user example.
276    """
277    assert not logfile.exists()
278    r_json, r_html = reportfiles
279    assert not r_json.exists()
280    assert not r_html.exists()
281    result = subprocess.run(
282        [
283            "potluck_eval",
284            "--import-from", IMPORT_FROM,
285            "-t", taskid,
286            "-u", username,
287            "--log", str(logfile),
288            "--outfile", str(r_json)
289        ]
290    )
291    assert result.returncode == 0
292    check_log_is_clean(logfile)
293
294    assert r_json.is_file()
295    assert r_html.is_file()
296
297    with r_json.open() as fin:
298        report = json.load(fin)
299
300    if taskid in REPORT_EXPECTS:
301        if username in REPORT_EXPECTS[taskid]:
302            expectations = REPORT_EXPECTS[taskid][username]
303            for key in expectations:
304                assert key in report
305                with open("/home/pmwh/tmp/report.html", 'w') as fout:
306                    with r_html.open() as fin:
307                        fout.write(fin.read())
308                assert report[key] == expectations[key], (taskid, username)

Tests the potluck_eval script for a certain task/user example.

def test_specifications_checks(in_evaldir, taskid, logfile):
311def test_specifications_checks(in_evaldir, taskid, logfile):
312    """
313    A meta-meta test that runs the build-in specifications tests on the
314    example specifications to make sure they test clean.
315    """
316    assert not logfile.exists()
317    result = subprocess.run(
318        [
319            "potluck_eval",
320            "--import-from", IMPORT_FROM,
321            "-t", taskid,
322            "--check",
323            "--log", str(logfile)
324        ]
325    )
326    assert result.returncode == 0
327    check_log_is_clean(logfile)
328
329    # Look for expected strings in the log file
330    with logfile.open() as fin:
331        log = fin.read()
332
333    assert "All examples met expectations." in log
334    assert "Check of solution code passed." in log

A meta-meta test that runs the build-in specifications tests on the example specifications to make sure they test clean.

def test_validation(in_evaldir, taskid, username, validationreportfiles, logfile):
337def test_validation(
338    in_evaldir,
339    taskid,
340    username,
341    validationreportfiles,
342    logfile
343):
344    """
345    Tests the potluck_eval script validation mode for a certain
346    task/user example.
347    """
348    # Skip this test if there aren't any expectations for it: not all
349    # tasks can be validated (they may not define any validation goals).
350    if taskid not in VALIDATION_EXPECTS:
351        return
352    assert not logfile.exists()
353    r_json, r_html = validationreportfiles
354    assert not r_json.exists()
355    assert not r_html.exists()
356    result = subprocess.run(
357        [
358            "potluck_eval",
359            "--validate",
360            "--import-from", IMPORT_FROM,
361            "-t", taskid,
362            "-u", username,
363            "--log", str(logfile),
364            "--outfile", str(r_json)
365        ]
366    )
367    assert result.returncode == 0
368    check_log_is_clean(logfile)
369
370    assert r_json.is_file()
371    assert r_html.is_file()
372
373    with r_json.open() as fin:
374        report = json.load(fin)
375
376    if taskid in VALIDATION_EXPECTS:
377        if username in VALIDATION_EXPECTS[taskid]:
378            expectations = VALIDATION_EXPECTS[taskid][username]
379            for key in expectations:
380                assert key in report
381                assert report[key] == expectations[key]

Tests the potluck_eval script validation mode for a certain task/user example.