potluck.validation

Machinery for defining requirements for tests. Tests are submitted in a separate file using the optimism library, and we can require a certain number of distinct test cases that target specific functions/files, and require that all of the checks succeed.

The validation machinery runs the submitted tests file in a directory with the solution code and checks what test cases it checks and whether those checks succeed. rubrics.Rubric.validate_tests can then be used to generate a report based on all validation goals; the goals in this file should normally be used as validation goals, not evaluation goals.

View Source

  1"""
  2Machinery for defining requirements for tests. Tests are submitted in a
  3separate file using the `optimism` library, and we can require a certain
  4number of distinct test cases that target specific functions/files, and
  5require that all of the checks succeed.
  6
  7The validation machinery runs the submitted tests file in a directory
  8with the solution code and checks what test cases it checks and whether
  9those checks succeed. `rubrics.Rubric.validate_tests` can then be used
 10to generate a report based on all validation goals; the goals in this
 11file should normally be used as validation goals, not evaluation goals.
 12"""
 13
 14from . import rubrics
 15from . import contexts
 16from . import context_utils
 17from . import phrasing
 18from . import html_tools
 19
 20
 21#--------------------------------------------------#
 22# Goal subtypes for checking file-level test cases #
 23#--------------------------------------------------#
 24
 25class CasesTest(rubrics.Goal):
 26    """
 27    Runs a function against the auto-context for "validation_test_cases".
 28    Inherit and override the `check` method with a function that accepts
 29    a context and returns a goal evaluation result to define your test.
 30
 31    Note that these can only be used when the 'optimism' module is
 32    available.
 33    """
 34    def check(self, context):
 35        """
 36        Not implemented; override to define specific tests.
 37        """
 38        raise NotImplementedError(
 39            "CasesTest is an abstract class that can't be used"
 40            " directly."
 41        )
 42
 43    def __init__(
 44        self,
 45        taskid,
 46        identifier,
 47        description=(
 48            "BLANK EXPECTATIONS TEST",
 49            "THIS GOAL HAS NOT BEEN DEFINED"
 50        ),
 51        goal_type="testing",
 52        uses_slots=("validation_test_cases",),
 53        **kwargs
 54    ):
 55        """
 56        In addition to a task ID, an identifier, and a description, a
 57        goal type may be supplied other than the default "testing".
 58
 59        The categorizer "tests:" will be prepended to the given
 60        identifier.
 61
 62        The slots required should be given as uses_slots, and a relevant
 63        context will be selected or created as the testing context. By
 64        default the "validation_test_cases" slot is the only one used.
 65
 66        Any extra arguments are passed through to the `rubrics.Goal`
 67        constructor.
 68        """
 69        # Auto context dependency based on uses_slots
 70        depends = contexts.auto(*uses_slots)
 71        if len(depends) == 1:
 72            test_context = depends[0]
 73        else:
 74            # TODO: De-duplicate stuff where one context actually
 75            # provides everything needed via inheritance but auto
 76            # doesn't see that?
 77            test_context = contexts.Context(
 78                description=(
 79                    "Test cases defined by your code",
 80                    (
 81                        "The " + phrasing.comma_list(
 82                            slot.replace("_", " ")
 83                            for slot in uses_slots
 84                        )
 85                      + " of your code."
 86                    )
 87                ),
 88                builder=lambda ctx: ctx,
 89                depends=depends
 90            )
 91
 92        if "test_in" not in kwargs:
 93            kwargs["test_in"] = {}
 94        if "contexts" not in kwargs["test_in"]:
 95            kwargs["test_in"]["contexts"] = [ test_context ]
 96
 97        # Specified goal type
 98        if "tags" not in kwargs:
 99            kwargs["tags"] = {}
100        kwargs["tags"]["goal_type"] = goal_type
101
102        # Set up rubrics.Goal stuff
103        super().__init__(
104            taskid,
105            "tests:" + identifier,
106            description,
107            **kwargs
108        )
109
110    # subgoals is inherited (no subgoals)
111
112    # table is inherited
113
114    def evaluate_in_context(self, context=None):
115        """
116        Runs the checker and returns its result.
117        """
118        context = context or {}
119
120        try:
121            self.result = self.check(context)
122
123            if self.result is None:
124                raise ValueError(
125                    f"Test case check for {self.__class__.__name__}"
126                    f" returned None!"
127                )
128        except Exception:
129            self.result = {
130                "status": "failed",
131                "traceback": html_tools.html_traceback(
132                    linkable=context_utils.linkmap(context)
133                )
134            }
135            self.set_explanation(
136                context,
137                status="crash",
138                default=html_tools.html_traceback(
139                    title="Error while checking your test cases.",
140                    linkable=context_utils.linkmap(context)
141                )
142            )
143            return self.result
144
145        self.set_explanation(
146            context,
147            default=self.result["explanation"]
148        )
149
150        return self.result
151
152
153class DefinesEnoughTests(CasesTest):
154    """
155    A test cases checker which ensures that for each of certain listed
156    functions (or files), a certain number of distinct test cases are
157    established (using the `optimism` module).
158
159    Note that functions are specified by name to be matched against
160    __name__ attributes of actual functions checked, so if you're testing
161    methods you just use the method name, and testing decorated functions
162    may be tricky. (TODO: Check if this plays nicely with spec-specified
163    decorations.)
164
165    Test cases are counted as distinct if either their arguments or their
166    provided inputs differ.
167    """
168    def __init__(self, taskid, function_reqs, file_reqs, **kwargs):
169        """
170        A task ID is required. The other required arguments are two
171        dictionaries mapping function name strings and then filename
172        strings to integers specifying how many tests are required.
173
174        Other arguments get passed through to `CasesTest` and
175        potentially thence to `rubrics.Goal`.
176
177        The identifier will be "defines_enough".
178        """
179        self.function_reqs = function_reqs
180        self.file_reqs = file_reqs
181
182        # Check types for function requirements keys and values
183        for fname in function_reqs:
184            if not isinstance(fname, str):
185                raise TypeError(
186                    (
187                        "Each function requirement must be a string."
188                        " (You used {} as a key, which is a {})."
189                    ).format(
190                        repr(fname),
191                        type(fname)
192                    )
193                )
194
195            val = function_reqs[fname]
196            if not isinstance(val, int):
197                raise TypeError(
198                    (
199                        "Each function requirement must use an integer"
200                        " as the value. (requirement with key {} had"
201                        " value {} which is a {})."
202                    ).format(
203                        repr(fname),
204                        repr(val),
205                        type(val)
206                    )
207                )
208
209        # Check types for file requirements keys and values
210        for filename in file_reqs:
211            if not isinstance(filename, str):
212                raise TypeError(
213                    (
214                        "Each file requirement must be a string."
215                        " (You used {} as a key, which is a {})."
216                    ).format(
217                        repr(filename),
218                        type(filename)
219                    )
220                )
221
222            val = file_reqs[filename]
223            if not isinstance(val, int):
224                raise TypeError(
225                    (
226                        "Each file requirement must use an integer as"
227                        " the value. (requirement with key {} had"
228                        " value {} which is a {})."
229                    ).format(
230                        repr(filename),
231                        repr(val),
232                        type(val)
233                    )
234                )
235
236        # Check if optimism is available
237        try:
238            import optimism # noqa F401
239        except Exception:
240            raise NotImplementedError(
241                "DefinesEnoughTests cannot be used because the"
242                " 'optimism' module cannot be imported."
243            )
244
245        # Set automatic description
246        if "description" not in kwargs:
247            rlist = [
248                "Function <code>{}</code>: {} cases".format(
249                    fn,
250                    required
251                )
252                for fn, required in self.function_reqs.items()
253            ] + [
254                "File '{}': {} cases".format(
255                    filename,
256                    required
257                )
258                for filename, required in self.file_reqs.items()
259            ]
260            kwargs["description"] = (
261                "Defines required test cases",
262                (
263                    """\
264Your code must use the <code>optimism</code> module to create a certain
265number of test cases which use the following functions/files. Test cases
266that are the same as each other (same arguments and/or inputs) don't
267count. (Each test case must include at least one check).\n"""
268                  + html_tools.build_list(rlist)
269                )
270            )
271
272        super().__init__(taskid, "defines_enough", **kwargs)
273
274    def check(self, context):
275        """
276        Looks for an adequate number of established test cases in the
277        given context that have recorded checks.
278        """
279        try:
280            import optimism
281        except Exception:
282            raise NotImplementedError(
283                "Cannot check for test cases because optimism cannot be"
284                " imported."
285            )
286        cases = context_utils.extract(context, "validation_test_cases")
287        by_fn = {}
288        by_file = {}
289        for case in cases:
290            # Skip test cases that have not been checked
291            if len(case.outcomes) == 0:
292                continue
293
294            # Categorize by function/file tested
295            if issubclass(case.manager.case_type, optimism.FunctionCase):
296                fname = case.manager.target.__name__
297                add_to = by_fn.setdefault(fname, [])
298
299                # Don't record duplicate cases
300                duplicate = False
301                for recorded in add_to:
302                    if (
303                        case.args == recorded.args
304                    and case.kwargs == recorded.kwargs
305                    and case.inputs == recorded.inputs
306                    ):
307                        duplicate = True
308                        break
309
310                # Record this case
311                if not duplicate:
312                    add_to.append(case)
313
314            elif issubclass(case.manager.case_type, optimism.FileCase):
315                add_to = by_file.setdefault(case.manager.target, [])
316
317                # Don't record duplicate cases
318                duplicate = False
319                for recorded in add_to:
320                    if (
321                        case.args == recorded.args
322                    and case.kwargs == recorded.kwargs
323                    and case.inputs == recorded.inputs
324                    ):
325                        duplicate = True
326                        break
327
328                # Record this case
329                if not duplicate:
330                    add_to.append(case)
331
332            # Note that we ignore other kinds of cases including block
333            # cases, which would be hard to count/require...
334
335        any_tests = False
336        deficient = False
337        reports = []
338        for req_file, required in self.file_reqs.items():
339            cases = by_file.get(req_file, [])
340            count = len(cases)
341
342            if count > 0:
343                any_tests = True
344
345            if count < required:
346                deficient = True
347                symbol = '✗'
348            else:
349                symbol = '✓'
350
351            reports.append(
352                f"{symbol} '{req_file}': {count} / {required}"
353            )
354
355        for req_fn, required in self.function_reqs.items():
356            cases = by_fn.get(req_fn, [])
357            count = len(cases)
358
359            if count > 0:
360                any_tests = True
361
362            if count < required:
363                deficient = True
364                symbol = '✗'
365            else:
366                symbol = '✓'
367
368            reports.append(
369                f"{symbol} <code>{req_fn}</code>: {count} / {required}"
370            )
371
372        if not any_tests:
373            return {
374                "status": "failed",
375                "explanation": (
376                    "Running your module did not establish any test"
377                    " cases for required functions or files."
378                )
379            }
380        elif deficient:
381            return {
382                "status": "partial",
383                "explanation": (
384                    "Your module did not establish as many test cases as"
385                    " were required for all functions/files:\n"
386                ) + html_tools.build_list(reports)
387            }
388        else:
389            return {
390                "status": "accomplished",
391                "explanation": (
392                    "Your module established enough test cases for each"
393                    " function or file it was required to test."
394                )
395            }
396
397
398def list_case_outcomes(cases):
399    """
400    Creates an HTML list out of test case objects.
401    """
402    items = []
403    for case in cases:
404        for (passed, tag, message) in case.outcomes:
405            short_tag = tag.split('/')[-1]
406            message = html_tools.escape(message)
407            lines = message.splitlines()
408            lines[0] = lines[0][:2] + lines[0].split('/')[-1]
409            message = html_tools.wrap_text_with_indentation(
410                '\n'.join(lines)
411            )
412            items.append(f"✗ {short_tag}<br><pre>{message}</pre>")
413    return html_tools.build_list(items)
414
415
416class ChecksSucceed(CasesTest):
417    """
418    An test case checker which ensures that each recorded outcome for
419    each established test case in the submitted testing module is a
420    success.
421
422    Note that when this goal is checked during validation, tests in the
423    "validation_test_cases" slot have been run against the solution
424    code, whereas when this goal is used during evaluation, those same
425    test cases have been run against the student's submitted code.
426
427    TODO: Manage multi-file submission and/or test file copying so that
428    "validation_test_cases" is actually available during evaluation.
429    """
430    def __init__(self, taskid, **kwargs):
431        """
432        A task ID is required. Arguments are passed through to
433        `CasesTest`.
434
435        The identifier will be "checks_succeeded".
436        """
437
438        try:
439            import optimism # noqa F401
440        except Exception:
441            raise NotImplementedError(
442                "ChecksSucceed cannot be used because the 'optimism'"
443                " module cannot be imported."
444            )
445
446        if "description" not in kwargs:
447            kwargs["description"] = (
448                (
449                    "All checks must succeed"
450                ),
451                (
452                    "Every time your code checks a test case using the"
453                    " <code>optimism</code> module the check must"
454                    " succeed."
455                )
456            )
457
458        super().__init__(taskid, "checks_succeeded", **kwargs)
459
460    def check(self, context):
461        """
462        Looks for any failed outcomes in test cases within the given
463        context.
464        """
465        cases = context_utils.extract(context, "validation_test_cases")
466        any_failed = False
467        any_passed = False
468        failing = []
469        for case in cases:
470            failed_here = False
471            for (succeeded, tag, msg) in case.outcomes:
472                if succeeded:
473                    any_passed = True
474                else:
475                    failed_here = True
476
477            if failed_here:
478                any_failed = True
479                failing.append(case)
480
481        if any_failed:
482            fail_list = list_case_outcomes(failing)
483            if any_passed:
484                return {
485                    "status": "partial",
486                    "explanation": (
487                        "Some of your code's checks failed:\n"
488                    ) + fail_list
489                }
490            else:
491                return {
492                    "status": "failed",
493                    "explanation": (
494                        "None of your code's checks succeeded:\n"
495                    ) + fail_list
496                }
497        else:
498            if any_passed:
499                return {
500                    "status": "accomplished",
501                    "explanation": (
502                        "All of your code's checks succeeded."
503                    )
504                }
505            else:
506                return {
507                    "status": "failed",
508                    "explanation": (
509                        "Your code did not check any test cases."
510                    )
511                }
512
513
514#--------------------------------------------------#
515# Harnesses for checking function-level test cases #
516#--------------------------------------------------#
517
518def check_tests_harness(
519    function,
520    *args,
521    _req_cases=None,
522    _must_pass=True,
523    **kwargs
524):
525    """
526    A test harness (to be used with
527    `potluck.specifications.test_with_harness`) which will return a
528    string reporting on the aggregate behavior of `optimism` tests that
529    were defined and checked as a result of running a particular
530    function. A minimum number of distinct `optimism` tests cases can be
531    required for each of certain target functions, and that those test
532    cases must pass all checks applied (this second check can be skipped
533    by setting `_must_pass` to `False`).
534
535    If `_must_pass` is set to the string "all", then all tests must
536    pass, even if more than the required number of tests are defined,
537    otherwise enough tests must pass (i.e., have been checked at least
538    once and have succeeded on every check applied) to meet the minimum
539    requirements, but cases beyond those are allowed to fail. If
540    `_must_pass` is set to the string "not all" then at least one test
541    must fail, but the specific number of successes/failures is not
542    reported.
543
544    Note that this function has a side effect of deleting all
545    previously-defined optimism tests.
546
547    The `_req_cases` argument must be a dictionary mapping function names
548    to integers specifying how many distinct tests are required for that
549    function. Tests for files can be required by prepending 'file:' to
550    the filename to require tests for, and code block tests can be
551    required by prepending 'block:' to the exact code block string (but
552    that's quite fragile). If `_req_cases` is None (the default) then
553    the report will include information on all defined tests.
554
555    As a harness function, most arguments are passed through to whatever
556    function is being tested; if that function has arguments named
557    `_req_cases` and/or `_must_pass` you'll have to define your own
558    custom harness that uses different keyword argument names. Because
559    positional arguments are passed through, these two meta-parameters
560    must be given as keyword arguments.
561
562    Note that technically, if the solution code has failing test cases,
563    when `_must_pass` is set to "all" the reports produced will be the
564    same if the submitted code fails the same number of test cases.
565
566    (Note: these docstring paragraphs will be used as the default goal
567    description...)
568    """
569    # Check if optimism is available
570    try:
571        import optimism # noqa F401
572    except Exception:
573        raise NotImplementedError(
574            "check_tests_harness cannot be used because the"
575            " 'optimism' module cannot be imported."
576        )
577
578    # First clean up any existing tests
579    optimism.deleteAllTestSuites()
580
581    # Run the function, ignoring its result
582    function(*args, **kwargs)
583
584    # List all currently defined test cases (i.e., those defined by the
585    # function we're looking at)
586    defined = optimism.listAllTrials()
587
588    report = ""
589
590    # Check each defined case and create a map of the number of passing
591    # and failing cases for each function/file/block tested; as a side
592    # effect add lines to the report detailing any failing cases if
593    # _must_pass is set to "all".
594    caseMap = {}
595    for case in defined:
596        # Figure out the case ID
597        if isinstance(case.manager, optimism.FunctionManager):
598            case_id = case.manager.target.__name__
599            show_case_id = "function:" + case_id
600        elif isinstance(case.manager, optimism.FileManager):
601            case_id = "file:" + case.manager.target
602            show_case_id = case_id
603        elif isinstance(case.manager, optimism.BlockManager):
604            case_id = "block:" + case.manager.target
605            show_case_id = "block:" + repr(case.manager.target)
606        else:
607            case_id = None
608            show_case_id = "unknown"
609
610        caseMap.setdefault(case_id, [show_case_id, 0, 0])
611
612        # Go through each outcome
613        n_failed = 0
614        n_checks = 0
615        for passed, _, _ in case.outcomes:
616            n_checks += 1
617            if not passed:
618                n_failed += 1
619
620        if n_checks > 0 and n_failed == 0:
621            # All checks passed, and there was at least one
622            # This counts as a passing case
623            caseMap[case_id][1] += 1
624
625        elif n_failed > 0:
626            # some checks failed
627            # Record the failure
628            caseMap[case_id][2] += 1
629            if _must_pass == "all":
630                # Note failure in our report, but don't include specific
631                # line numbers, since those might differ between
632                # submitted and solution files
633                report += (
634                    f"{n_failed} checks failed for test(s) of"
635                    f" {show_case_id}\n"
636                )
637
638    # Check that the required number cases are present
639    if _req_cases is None:
640        # Report on every defined test
641        for (case_id, (show_case_id, succeeded, failed)) in caseMap.items():
642            # Skip cases where no checks were performed
643            if succeeded + failed == 0:
644                continue
645
646            if _must_pass is True and succeeded == 0:
647                # if _must_pass is 'all' we've already reported failures
648                report += (
649                    f"{failed} {phrasing.plural(failed, 'check')} failed"
650                    f" for test(s) of {show_case_id}\n"
651                )
652            elif _must_pass:
653                # report success
654                report += (
655                    f"At least one check succeeded for test(s) of"
656                    f" {show_case_id}\n"
657                )
658            else:
659                # must_pass must be False, so we just report that checks
660                # were defined regardless of success/failure
661                report += (
662                    f"Performed at least one check for test(s) of"
663                    f" {show_case_id}\n"
664                )
665    else:
666        # Just report on required tests
667        for req, threshold in _req_cases.items():
668            show_case_id, succeeded, failed = caseMap.get(
669                req,
670                [repr(req), 0, 0]
671            ) # TODO: More elegant here?
672            if _must_pass:
673                if succeeded >= threshold:
674                    cases_passed = phrasing.plural(
675                        threshold,
676                        'case passed',
677                        'cases passed'
678                    )
679                    report += (
680                        f"At least {threshold} {cases_passed} for"
681                        f" test(s) of {show_case_id}\n"
682                    )
683                else:
684                    cases_passed = phrasing.plural(
685                        succeeded,
686                        'case passed',
687                        'cases passed'
688                    )
689                    total = succeeded + failed
690                    if total == succeeded:
691                        cases_passed = phrasing.plural(
692                            total,
693                            'case was defined',
694                            'cases were defined'
695                        )
696                    only = "Only " if succeeded > 0 else ""
697                    out_of = f"/{total}" if total > succeeded else ""
698                    report += (
699                        f"{only}{succeeded}{out_of} {cases_passed} for"
700                        f" test(s) of {show_case_id} ({threshold} were"
701                        f" required)\n"
702                    )
703            else:
704                if succeeded + failed >= threshold:
705                    cases_were = phrasing.plural(
706                        threshold,
707                        'case was',
708                        'cases were'
709                    )
710                    report += (
711                        f"At least {threshold} {cases_were} defined"
712                        f" for {show_case_id}\n"
713                    )
714                else:
715                    total = succeeded + failed
716                    cases_were = phrasing.plural(
717                        total,
718                        'case was',
719                        'cases were'
720                    )
721                    only = "Only " if total > 0 else ""
722                    report += (
723                        f"{only}{total} {cases_were} defined for"
724                        f" {show_case_id} ({threshold}"
725                        f" {phrasing.plural(threshold, 'was', 'were')}"
726                        f" required)\n"
727                    )
728
729    # We return our report, to be compared with the same report when run
730    # against the solution code
731    return report
732
733
734def tests_report_description(target_fn, _req_cases=None, _must_pass=True):
735    """
736    Returns a goal description tuple suitable for use with
737    `specifications.HasGoal.set_goal_description` when
738    `test_with_harness` has been used to set up `check_tests_harness` as
739    the testing harness. Pass the same target function and keyword
740    arguments used with the test harness (i.e., which were included in
741    the test case).
742
743    TODO: Option for generic version when multiple test cases are grouped?
744    """
745    if _req_cases is None:
746        if _must_pass == "all":
747            return (
748                (
749                    "Must define and successfully check"
750                    " <code>optimism</code> test cases for the correct"
751                    " functions."
752                ),
753                (
754                    "Your code must define and check"
755                    " <code>optimism</code> test cases for each"
756                    " function, file, or code block that the solution"
757                    " code does. The number of test cases that fail at"
758                    " least one check must match the solution results"
759                    " (usually this means no check should fail)."
760                )
761            )
762        elif _must_pass is True:
763            return (
764                (
765                    "Must define and check <code>optimism</code> test"
766                    " cases for the correct functions."
767                ),
768                (
769                    "Your code must define and check"
770                    " <code>optimism</code> test cases for each"
771                    " function, file, or code block that the solution"
772                    " code does. At least one check must succeed for"
773                    " each test case defined by the solution code."
774                )
775            )
776        else:
777            return (
778                (
779                    "Must define and check <code>optimism</code> test"
780                    " cases for the correct functions."
781                ),
782                (
783                    "Your code must define and check"
784                    " <code>optimism</code> test cases for each"
785                    " function, file, or code block that the solution"
786                    " code does. It does not matter if the checks"
787                    " succeed or fail as long as at least one check is"
788                    " performed per test case."
789                )
790            )
791    else:
792        # Build a list of strings describing per-case-id requirements
793        checklist = []
794        for req, threshold in _req_cases.items():
795            if req.startswith('block:'):
796                show_case = (
797                    f"the code block <pre><code>{req[6:]}</code></pre>"
798                )
799            elif req.startswith('file:'):
800                show_case = f"the file '{req[5:]}'"
801            else:
802                show_case = f"the function {req}"
803
804            if _must_pass:
805                checklist.append(
806                    f"All checks must pass for at least {threshold}"
807                    f" test {phrasing.plural(threshold, 'case')} for"
808                    f" {show_case}."
809                )
810            else:
811                checklist.append(
812                    f"At least {threshold} test"
813                    f" {phrasing.plural(threshold, 'case')} for"
814                    f" {show_case} must be defined, and each must"
815                    f" include at least one check (which does not have"
816                    f" to succeed)."
817                )
818
819        # Construct detail text
820        details = ""
821        if checklist:
822            details += (
823                f"The following test case(s) must be established by your"
824                f" <code>{target_fn}</code> function and/or must"
825                f" succeed:"
826            )
827            details += html_tools.build_list(checklist)
828
829        elif _must_pass != "all":
830            # If there are no listed checks, but _req_cases is not None,
831            # you'll need to craft a custom description yourself
832            raise ValueError(
833                "_req_cases did not include any required test cases. You"
834                " should fix that or use a custom description."
835            )
836
837        if _must_pass == "all":
838            details += (
839                "The same number of checks (usually zero) must fail for"
840                " the same test cases as the solution code."
841            )
842
843        return (
844            (
845                f"Your <code>{target_fn}</code> function must establish"
846                f" the correct test cases."
847            ),
848            details
849        )

class CasesTest(potluck.rubrics.Goal): View Source

 26class CasesTest(rubrics.Goal):
 27    """
 28    Runs a function against the auto-context for "validation_test_cases".
 29    Inherit and override the `check` method with a function that accepts
 30    a context and returns a goal evaluation result to define your test.
 31
 32    Note that these can only be used when the 'optimism' module is
 33    available.
 34    """
 35    def check(self, context):
 36        """
 37        Not implemented; override to define specific tests.
 38        """
 39        raise NotImplementedError(
 40            "CasesTest is an abstract class that can't be used"
 41            " directly."
 42        )
 43
 44    def __init__(
 45        self,
 46        taskid,
 47        identifier,
 48        description=(
 49            "BLANK EXPECTATIONS TEST",
 50            "THIS GOAL HAS NOT BEEN DEFINED"
 51        ),
 52        goal_type="testing",
 53        uses_slots=("validation_test_cases",),
 54        **kwargs
 55    ):
 56        """
 57        In addition to a task ID, an identifier, and a description, a
 58        goal type may be supplied other than the default "testing".
 59
 60        The categorizer "tests:" will be prepended to the given
 61        identifier.
 62
 63        The slots required should be given as uses_slots, and a relevant
 64        context will be selected or created as the testing context. By
 65        default the "validation_test_cases" slot is the only one used.
 66
 67        Any extra arguments are passed through to the `rubrics.Goal`
 68        constructor.
 69        """
 70        # Auto context dependency based on uses_slots
 71        depends = contexts.auto(*uses_slots)
 72        if len(depends) == 1:
 73            test_context = depends[0]
 74        else:
 75            # TODO: De-duplicate stuff where one context actually
 76            # provides everything needed via inheritance but auto
 77            # doesn't see that?
 78            test_context = contexts.Context(
 79                description=(
 80                    "Test cases defined by your code",
 81                    (
 82                        "The " + phrasing.comma_list(
 83                            slot.replace("_", " ")
 84                            for slot in uses_slots
 85                        )
 86                      + " of your code."
 87                    )
 88                ),
 89                builder=lambda ctx: ctx,
 90                depends=depends
 91            )
 92
 93        if "test_in" not in kwargs:
 94            kwargs["test_in"] = {}
 95        if "contexts" not in kwargs["test_in"]:
 96            kwargs["test_in"]["contexts"] = [ test_context ]
 97
 98        # Specified goal type
 99        if "tags" not in kwargs:
100            kwargs["tags"] = {}
101        kwargs["tags"]["goal_type"] = goal_type
102
103        # Set up rubrics.Goal stuff
104        super().__init__(
105            taskid,
106            "tests:" + identifier,
107            description,
108            **kwargs
109        )
110
111    # subgoals is inherited (no subgoals)
112
113    # table is inherited
114
115    def evaluate_in_context(self, context=None):
116        """
117        Runs the checker and returns its result.
118        """
119        context = context or {}
120
121        try:
122            self.result = self.check(context)
123
124            if self.result is None:
125                raise ValueError(
126                    f"Test case check for {self.__class__.__name__}"
127                    f" returned None!"
128                )
129        except Exception:
130            self.result = {
131                "status": "failed",
132                "traceback": html_tools.html_traceback(
133                    linkable=context_utils.linkmap(context)
134                )
135            }
136            self.set_explanation(
137                context,
138                status="crash",
139                default=html_tools.html_traceback(
140                    title="Error while checking your test cases.",
141                    linkable=context_utils.linkmap(context)
142                )
143            )
144            return self.result
145
146        self.set_explanation(
147            context,
148            default=self.result["explanation"]
149        )
150
151        return self.result

Runs a function against the auto-context for "validation_test_cases". Inherit and override the check method with a function that accepts a context and returns a goal evaluation result to define your test.

Note that these can only be used when the 'optimism' module is available.

CasesTest( taskid, identifier, description=('BLANK EXPECTATIONS TEST', 'THIS GOAL HAS NOT BEEN DEFINED'), goal_type='testing', uses_slots=('validation_test_cases',), **kwargs) View Source

 44    def __init__(
 45        self,
 46        taskid,
 47        identifier,
 48        description=(
 49            "BLANK EXPECTATIONS TEST",
 50            "THIS GOAL HAS NOT BEEN DEFINED"
 51        ),
 52        goal_type="testing",
 53        uses_slots=("validation_test_cases",),
 54        **kwargs
 55    ):
 56        """
 57        In addition to a task ID, an identifier, and a description, a
 58        goal type may be supplied other than the default "testing".
 59
 60        The categorizer "tests:" will be prepended to the given
 61        identifier.
 62
 63        The slots required should be given as uses_slots, and a relevant
 64        context will be selected or created as the testing context. By
 65        default the "validation_test_cases" slot is the only one used.
 66
 67        Any extra arguments are passed through to the `rubrics.Goal`
 68        constructor.
 69        """
 70        # Auto context dependency based on uses_slots
 71        depends = contexts.auto(*uses_slots)
 72        if len(depends) == 1:
 73            test_context = depends[0]
 74        else:
 75            # TODO: De-duplicate stuff where one context actually
 76            # provides everything needed via inheritance but auto
 77            # doesn't see that?
 78            test_context = contexts.Context(
 79                description=(
 80                    "Test cases defined by your code",
 81                    (
 82                        "The " + phrasing.comma_list(
 83                            slot.replace("_", " ")
 84                            for slot in uses_slots
 85                        )
 86                      + " of your code."
 87                    )
 88                ),
 89                builder=lambda ctx: ctx,
 90                depends=depends
 91            )
 92
 93        if "test_in" not in kwargs:
 94            kwargs["test_in"] = {}
 95        if "contexts" not in kwargs["test_in"]:
 96            kwargs["test_in"]["contexts"] = [ test_context ]
 97
 98        # Specified goal type
 99        if "tags" not in kwargs:
100            kwargs["tags"] = {}
101        kwargs["tags"]["goal_type"] = goal_type
102
103        # Set up rubrics.Goal stuff
104        super().__init__(
105            taskid,
106            "tests:" + identifier,
107            description,
108            **kwargs
109        )

In addition to a task ID, an identifier, and a description, a goal type may be supplied other than the default "testing".

The categorizer "tests:" will be prepended to the given identifier.

The slots required should be given as uses_slots, and a relevant context will be selected or created as the testing context. By default the "validation_test_cases" slot is the only one used.

Any extra arguments are passed through to the rubrics.Goal constructor.

def check(self, context): View Source

35    def check(self, context):
36        """
37        Not implemented; override to define specific tests.
38        """
39        raise NotImplementedError(
40            "CasesTest is an abstract class that can't be used"
41            " directly."
42        )

Not implemented; override to define specific tests.

def evaluate_in_context(self, context=None): View Source

115    def evaluate_in_context(self, context=None):
116        """
117        Runs the checker and returns its result.
118        """
119        context = context or {}
120
121        try:
122            self.result = self.check(context)
123
124            if self.result is None:
125                raise ValueError(
126                    f"Test case check for {self.__class__.__name__}"
127                    f" returned None!"
128                )
129        except Exception:
130            self.result = {
131                "status": "failed",
132                "traceback": html_tools.html_traceback(
133                    linkable=context_utils.linkmap(context)
134                )
135            }
136            self.set_explanation(
137                context,
138                status="crash",
139                default=html_tools.html_traceback(
140                    title="Error while checking your test cases.",
141                    linkable=context_utils.linkmap(context)
142                )
143            )
144            return self.result
145
146        self.set_explanation(
147            context,
148            default=self.result["explanation"]
149        )
150
151        return self.result

Runs the checker and returns its result.

Inherited Members

potluck.rubrics.Goal: USED_IDS; unique_id; description_topic; description_details; feedback_topic; feedback_details; get_goal_type; set_default_goal_type; reset; reset_network; full_reset; subgoals; evaluate; table; set_explanation

class DefinesEnoughTests(CasesTest): View Source

154class DefinesEnoughTests(CasesTest):
155    """
156    A test cases checker which ensures that for each of certain listed
157    functions (or files), a certain number of distinct test cases are
158    established (using the `optimism` module).
159
160    Note that functions are specified by name to be matched against
161    __name__ attributes of actual functions checked, so if you're testing
162    methods you just use the method name, and testing decorated functions
163    may be tricky. (TODO: Check if this plays nicely with spec-specified
164    decorations.)
165
166    Test cases are counted as distinct if either their arguments or their
167    provided inputs differ.
168    """
169    def __init__(self, taskid, function_reqs, file_reqs, **kwargs):
170        """
171        A task ID is required. The other required arguments are two
172        dictionaries mapping function name strings and then filename
173        strings to integers specifying how many tests are required.
174
175        Other arguments get passed through to `CasesTest` and
176        potentially thence to `rubrics.Goal`.
177
178        The identifier will be "defines_enough".
179        """
180        self.function_reqs = function_reqs
181        self.file_reqs = file_reqs
182
183        # Check types for function requirements keys and values
184        for fname in function_reqs:
185            if not isinstance(fname, str):
186                raise TypeError(
187                    (
188                        "Each function requirement must be a string."
189                        " (You used {} as a key, which is a {})."
190                    ).format(
191                        repr(fname),
192                        type(fname)
193                    )
194                )
195
196            val = function_reqs[fname]
197            if not isinstance(val, int):
198                raise TypeError(
199                    (
200                        "Each function requirement must use an integer"
201                        " as the value. (requirement with key {} had"
202                        " value {} which is a {})."
203                    ).format(
204                        repr(fname),
205                        repr(val),
206                        type(val)
207                    )
208                )
209
210        # Check types for file requirements keys and values
211        for filename in file_reqs:
212            if not isinstance(filename, str):
213                raise TypeError(
214                    (
215                        "Each file requirement must be a string."
216                        " (You used {} as a key, which is a {})."
217                    ).format(
218                        repr(filename),
219                        type(filename)
220                    )
221                )
222
223            val = file_reqs[filename]
224            if not isinstance(val, int):
225                raise TypeError(
226                    (
227                        "Each file requirement must use an integer as"
228                        " the value. (requirement with key {} had"
229                        " value {} which is a {})."
230                    ).format(
231                        repr(filename),
232                        repr(val),
233                        type(val)
234                    )
235                )
236
237        # Check if optimism is available
238        try:
239            import optimism # noqa F401
240        except Exception:
241            raise NotImplementedError(
242                "DefinesEnoughTests cannot be used because the"
243                " 'optimism' module cannot be imported."
244            )
245
246        # Set automatic description
247        if "description" not in kwargs:
248            rlist = [
249                "Function <code>{}</code>: {} cases".format(
250                    fn,
251                    required
252                )
253                for fn, required in self.function_reqs.items()
254            ] + [
255                "File '{}': {} cases".format(
256                    filename,
257                    required
258                )
259                for filename, required in self.file_reqs.items()
260            ]
261            kwargs["description"] = (
262                "Defines required test cases",
263                (
264                    """\
265Your code must use the <code>optimism</code> module to create a certain
266number of test cases which use the following functions/files. Test cases
267that are the same as each other (same arguments and/or inputs) don't
268count. (Each test case must include at least one check).\n"""
269                  + html_tools.build_list(rlist)
270                )
271            )
272
273        super().__init__(taskid, "defines_enough", **kwargs)
274
275    def check(self, context):
276        """
277        Looks for an adequate number of established test cases in the
278        given context that have recorded checks.
279        """
280        try:
281            import optimism
282        except Exception:
283            raise NotImplementedError(
284                "Cannot check for test cases because optimism cannot be"
285                " imported."
286            )
287        cases = context_utils.extract(context, "validation_test_cases")
288        by_fn = {}
289        by_file = {}
290        for case in cases:
291            # Skip test cases that have not been checked
292            if len(case.outcomes) == 0:
293                continue
294
295            # Categorize by function/file tested
296            if issubclass(case.manager.case_type, optimism.FunctionCase):
297                fname = case.manager.target.__name__
298                add_to = by_fn.setdefault(fname, [])
299
300                # Don't record duplicate cases
301                duplicate = False
302                for recorded in add_to:
303                    if (
304                        case.args == recorded.args
305                    and case.kwargs == recorded.kwargs
306                    and case.inputs == recorded.inputs
307                    ):
308                        duplicate = True
309                        break
310
311                # Record this case
312                if not duplicate:
313                    add_to.append(case)
314
315            elif issubclass(case.manager.case_type, optimism.FileCase):
316                add_to = by_file.setdefault(case.manager.target, [])
317
318                # Don't record duplicate cases
319                duplicate = False
320                for recorded in add_to:
321                    if (
322                        case.args == recorded.args
323                    and case.kwargs == recorded.kwargs
324                    and case.inputs == recorded.inputs
325                    ):
326                        duplicate = True
327                        break
328
329                # Record this case
330                if not duplicate:
331                    add_to.append(case)
332
333            # Note that we ignore other kinds of cases including block
334            # cases, which would be hard to count/require...
335
336        any_tests = False
337        deficient = False
338        reports = []
339        for req_file, required in self.file_reqs.items():
340            cases = by_file.get(req_file, [])
341            count = len(cases)
342
343            if count > 0:
344                any_tests = True
345
346            if count < required:
347                deficient = True
348                symbol = '✗'
349            else:
350                symbol = '✓'
351
352            reports.append(
353                f"{symbol} '{req_file}': {count} / {required}"
354            )
355
356        for req_fn, required in self.function_reqs.items():
357            cases = by_fn.get(req_fn, [])
358            count = len(cases)
359
360            if count > 0:
361                any_tests = True
362
363            if count < required:
364                deficient = True
365                symbol = '✗'
366            else:
367                symbol = '✓'
368
369            reports.append(
370                f"{symbol} <code>{req_fn}</code>: {count} / {required}"
371            )
372
373        if not any_tests:
374            return {
375                "status": "failed",
376                "explanation": (
377                    "Running your module did not establish any test"
378                    " cases for required functions or files."
379                )
380            }
381        elif deficient:
382            return {
383                "status": "partial",
384                "explanation": (
385                    "Your module did not establish as many test cases as"
386                    " were required for all functions/files:\n"
387                ) + html_tools.build_list(reports)
388            }
389        else:
390            return {
391                "status": "accomplished",
392                "explanation": (
393                    "Your module established enough test cases for each"
394                    " function or file it was required to test."
395                )
396            }

A test cases checker which ensures that for each of certain listed functions (or files), a certain number of distinct test cases are established (using the optimism module).

Note that functions are specified by name to be matched against __name__ attributes of actual functions checked, so if you're testing methods you just use the method name, and testing decorated functions may be tricky. (TODO: Check if this plays nicely with spec-specified decorations.)

Test cases are counted as distinct if either their arguments or their provided inputs differ.

DefinesEnoughTests(taskid, function_reqs, file_reqs, **kwargs) View Source

169    def __init__(self, taskid, function_reqs, file_reqs, **kwargs):
170        """
171        A task ID is required. The other required arguments are two
172        dictionaries mapping function name strings and then filename
173        strings to integers specifying how many tests are required.
174
175        Other arguments get passed through to `CasesTest` and
176        potentially thence to `rubrics.Goal`.
177
178        The identifier will be "defines_enough".
179        """
180        self.function_reqs = function_reqs
181        self.file_reqs = file_reqs
182
183        # Check types for function requirements keys and values
184        for fname in function_reqs:
185            if not isinstance(fname, str):
186                raise TypeError(
187                    (
188                        "Each function requirement must be a string."
189                        " (You used {} as a key, which is a {})."
190                    ).format(
191                        repr(fname),
192                        type(fname)
193                    )
194                )
195
196            val = function_reqs[fname]
197            if not isinstance(val, int):
198                raise TypeError(
199                    (
200                        "Each function requirement must use an integer"
201                        " as the value. (requirement with key {} had"
202                        " value {} which is a {})."
203                    ).format(
204                        repr(fname),
205                        repr(val),
206                        type(val)
207                    )
208                )
209
210        # Check types for file requirements keys and values
211        for filename in file_reqs:
212            if not isinstance(filename, str):
213                raise TypeError(
214                    (
215                        "Each file requirement must be a string."
216                        " (You used {} as a key, which is a {})."
217                    ).format(
218                        repr(filename),
219                        type(filename)
220                    )
221                )
222
223            val = file_reqs[filename]
224            if not isinstance(val, int):
225                raise TypeError(
226                    (
227                        "Each file requirement must use an integer as"
228                        " the value. (requirement with key {} had"
229                        " value {} which is a {})."
230                    ).format(
231                        repr(filename),
232                        repr(val),
233                        type(val)
234                    )
235                )
236
237        # Check if optimism is available
238        try:
239            import optimism # noqa F401
240        except Exception:
241            raise NotImplementedError(
242                "DefinesEnoughTests cannot be used because the"
243                " 'optimism' module cannot be imported."
244            )
245
246        # Set automatic description
247        if "description" not in kwargs:
248            rlist = [
249                "Function <code>{}</code>: {} cases".format(
250                    fn,
251                    required
252                )
253                for fn, required in self.function_reqs.items()
254            ] + [
255                "File '{}': {} cases".format(
256                    filename,
257                    required
258                )
259                for filename, required in self.file_reqs.items()
260            ]
261            kwargs["description"] = (
262                "Defines required test cases",
263                (
264                    """\
265Your code must use the <code>optimism</code> module to create a certain
266number of test cases which use the following functions/files. Test cases
267that are the same as each other (same arguments and/or inputs) don't
268count. (Each test case must include at least one check).\n"""
269                  + html_tools.build_list(rlist)
270                )
271            )
272
273        super().__init__(taskid, "defines_enough", **kwargs)

A task ID is required. The other required arguments are two dictionaries mapping function name strings and then filename strings to integers specifying how many tests are required.

Other arguments get passed through to CasesTest and potentially thence to rubrics.Goal.

The identifier will be "defines_enough".

def check(self, context): View Source

275    def check(self, context):
276        """
277        Looks for an adequate number of established test cases in the
278        given context that have recorded checks.
279        """
280        try:
281            import optimism
282        except Exception:
283            raise NotImplementedError(
284                "Cannot check for test cases because optimism cannot be"
285                " imported."
286            )
287        cases = context_utils.extract(context, "validation_test_cases")
288        by_fn = {}
289        by_file = {}
290        for case in cases:
291            # Skip test cases that have not been checked
292            if len(case.outcomes) == 0:
293                continue
294
295            # Categorize by function/file tested
296            if issubclass(case.manager.case_type, optimism.FunctionCase):
297                fname = case.manager.target.__name__
298                add_to = by_fn.setdefault(fname, [])
299
300                # Don't record duplicate cases
301                duplicate = False
302                for recorded in add_to:
303                    if (
304                        case.args == recorded.args
305                    and case.kwargs == recorded.kwargs
306                    and case.inputs == recorded.inputs
307                    ):
308                        duplicate = True
309                        break
310
311                # Record this case
312                if not duplicate:
313                    add_to.append(case)
314
315            elif issubclass(case.manager.case_type, optimism.FileCase):
316                add_to = by_file.setdefault(case.manager.target, [])
317
318                # Don't record duplicate cases
319                duplicate = False
320                for recorded in add_to:
321                    if (
322                        case.args == recorded.args
323                    and case.kwargs == recorded.kwargs
324                    and case.inputs == recorded.inputs
325                    ):
326                        duplicate = True
327                        break
328
329                # Record this case
330                if not duplicate:
331                    add_to.append(case)
332
333            # Note that we ignore other kinds of cases including block
334            # cases, which would be hard to count/require...
335
336        any_tests = False
337        deficient = False
338        reports = []
339        for req_file, required in self.file_reqs.items():
340            cases = by_file.get(req_file, [])
341            count = len(cases)
342
343            if count > 0:
344                any_tests = True
345
346            if count < required:
347                deficient = True
348                symbol = '✗'
349            else:
350                symbol = '✓'
351
352            reports.append(
353                f"{symbol} '{req_file}': {count} / {required}"
354            )
355
356        for req_fn, required in self.function_reqs.items():
357            cases = by_fn.get(req_fn, [])
358            count = len(cases)
359
360            if count > 0:
361                any_tests = True
362
363            if count < required:
364                deficient = True
365                symbol = '✗'
366            else:
367                symbol = '✓'
368
369            reports.append(
370                f"{symbol} <code>{req_fn}</code>: {count} / {required}"
371            )
372
373        if not any_tests:
374            return {
375                "status": "failed",
376                "explanation": (
377                    "Running your module did not establish any test"
378                    " cases for required functions or files."
379                )
380            }
381        elif deficient:
382            return {
383                "status": "partial",
384                "explanation": (
385                    "Your module did not establish as many test cases as"
386                    " were required for all functions/files:\n"
387                ) + html_tools.build_list(reports)
388            }
389        else:
390            return {
391                "status": "accomplished",
392                "explanation": (
393                    "Your module established enough test cases for each"
394                    " function or file it was required to test."
395                )
396            }

Looks for an adequate number of established test cases in the given context that have recorded checks.

Inherited Members

CasesTest: evaluate_in_context
potluck.rubrics.Goal: USED_IDS; unique_id; description_topic; description_details; feedback_topic; feedback_details; get_goal_type; set_default_goal_type; reset; reset_network; full_reset; subgoals; evaluate; table; set_explanation

def list_case_outcomes(cases): View Source

399def list_case_outcomes(cases):
400    """
401    Creates an HTML list out of test case objects.
402    """
403    items = []
404    for case in cases:
405        for (passed, tag, message) in case.outcomes:
406            short_tag = tag.split('/')[-1]
407            message = html_tools.escape(message)
408            lines = message.splitlines()
409            lines[0] = lines[0][:2] + lines[0].split('/')[-1]
410            message = html_tools.wrap_text_with_indentation(
411                '\n'.join(lines)
412            )
413            items.append(f"✗ {short_tag}<br><pre>{message}</pre>")
414    return html_tools.build_list(items)

Creates an HTML list out of test case objects.

class ChecksSucceed(CasesTest): View Source

417class ChecksSucceed(CasesTest):
418    """
419    An test case checker which ensures that each recorded outcome for
420    each established test case in the submitted testing module is a
421    success.
422
423    Note that when this goal is checked during validation, tests in the
424    "validation_test_cases" slot have been run against the solution
425    code, whereas when this goal is used during evaluation, those same
426    test cases have been run against the student's submitted code.
427
428    TODO: Manage multi-file submission and/or test file copying so that
429    "validation_test_cases" is actually available during evaluation.
430    """
431    def __init__(self, taskid, **kwargs):
432        """
433        A task ID is required. Arguments are passed through to
434        `CasesTest`.
435
436        The identifier will be "checks_succeeded".
437        """
438
439        try:
440            import optimism # noqa F401
441        except Exception:
442            raise NotImplementedError(
443                "ChecksSucceed cannot be used because the 'optimism'"
444                " module cannot be imported."
445            )
446
447        if "description" not in kwargs:
448            kwargs["description"] = (
449                (
450                    "All checks must succeed"
451                ),
452                (
453                    "Every time your code checks a test case using the"
454                    " <code>optimism</code> module the check must"
455                    " succeed."
456                )
457            )
458
459        super().__init__(taskid, "checks_succeeded", **kwargs)
460
461    def check(self, context):
462        """
463        Looks for any failed outcomes in test cases within the given
464        context.
465        """
466        cases = context_utils.extract(context, "validation_test_cases")
467        any_failed = False
468        any_passed = False
469        failing = []
470        for case in cases:
471            failed_here = False
472            for (succeeded, tag, msg) in case.outcomes:
473                if succeeded:
474                    any_passed = True
475                else:
476                    failed_here = True
477
478            if failed_here:
479                any_failed = True
480                failing.append(case)
481
482        if any_failed:
483            fail_list = list_case_outcomes(failing)
484            if any_passed:
485                return {
486                    "status": "partial",
487                    "explanation": (
488                        "Some of your code's checks failed:\n"
489                    ) + fail_list
490                }
491            else:
492                return {
493                    "status": "failed",
494                    "explanation": (
495                        "None of your code's checks succeeded:\n"
496                    ) + fail_list
497                }
498        else:
499            if any_passed:
500                return {
501                    "status": "accomplished",
502                    "explanation": (
503                        "All of your code's checks succeeded."
504                    )
505                }
506            else:
507                return {
508                    "status": "failed",
509                    "explanation": (
510                        "Your code did not check any test cases."
511                    )
512                }

An test case checker which ensures that each recorded outcome for each established test case in the submitted testing module is a success.

Note that when this goal is checked during validation, tests in the "validation_test_cases" slot have been run against the solution code, whereas when this goal is used during evaluation, those same test cases have been run against the student's submitted code.

TODO: Manage multi-file submission and/or test file copying so that "validation_test_cases" is actually available during evaluation.

ChecksSucceed(taskid, **kwargs) View Source

431    def __init__(self, taskid, **kwargs):
432        """
433        A task ID is required. Arguments are passed through to
434        `CasesTest`.
435
436        The identifier will be "checks_succeeded".
437        """
438
439        try:
440            import optimism # noqa F401
441        except Exception:
442            raise NotImplementedError(
443                "ChecksSucceed cannot be used because the 'optimism'"
444                " module cannot be imported."
445            )
446
447        if "description" not in kwargs:
448            kwargs["description"] = (
449                (
450                    "All checks must succeed"
451                ),
452                (
453                    "Every time your code checks a test case using the"
454                    " <code>optimism</code> module the check must"
455                    " succeed."
456                )
457            )
458
459        super().__init__(taskid, "checks_succeeded", **kwargs)

A task ID is required. Arguments are passed through to CasesTest.

The identifier will be "checks_succeeded".

def check(self, context): View Source

461    def check(self, context):
462        """
463        Looks for any failed outcomes in test cases within the given
464        context.
465        """
466        cases = context_utils.extract(context, "validation_test_cases")
467        any_failed = False
468        any_passed = False
469        failing = []
470        for case in cases:
471            failed_here = False
472            for (succeeded, tag, msg) in case.outcomes:
473                if succeeded:
474                    any_passed = True
475                else:
476                    failed_here = True
477
478            if failed_here:
479                any_failed = True
480                failing.append(case)
481
482        if any_failed:
483            fail_list = list_case_outcomes(failing)
484            if any_passed:
485                return {
486                    "status": "partial",
487                    "explanation": (
488                        "Some of your code's checks failed:\n"
489                    ) + fail_list
490                }
491            else:
492                return {
493                    "status": "failed",
494                    "explanation": (
495                        "None of your code's checks succeeded:\n"
496                    ) + fail_list
497                }
498        else:
499            if any_passed:
500                return {
501                    "status": "accomplished",
502                    "explanation": (
503                        "All of your code's checks succeeded."
504                    )
505                }
506            else:
507                return {
508                    "status": "failed",
509                    "explanation": (
510                        "Your code did not check any test cases."
511                    )
512                }

Looks for any failed outcomes in test cases within the given context.

Inherited Members

CasesTest: evaluate_in_context
potluck.rubrics.Goal: USED_IDS; unique_id; description_topic; description_details; feedback_topic; feedback_details; get_goal_type; set_default_goal_type; reset; reset_network; full_reset; subgoals; evaluate; table; set_explanation

def check_tests_harness(function, *args, _req_cases=None, _must_pass=True, **kwargs): View Source

519def check_tests_harness(
520    function,
521    *args,
522    _req_cases=None,
523    _must_pass=True,
524    **kwargs
525):
526    """
527    A test harness (to be used with
528    `potluck.specifications.test_with_harness`) which will return a
529    string reporting on the aggregate behavior of `optimism` tests that
530    were defined and checked as a result of running a particular
531    function. A minimum number of distinct `optimism` tests cases can be
532    required for each of certain target functions, and that those test
533    cases must pass all checks applied (this second check can be skipped
534    by setting `_must_pass` to `False`).
535
536    If `_must_pass` is set to the string "all", then all tests must
537    pass, even if more than the required number of tests are defined,
538    otherwise enough tests must pass (i.e., have been checked at least
539    once and have succeeded on every check applied) to meet the minimum
540    requirements, but cases beyond those are allowed to fail. If
541    `_must_pass` is set to the string "not all" then at least one test
542    must fail, but the specific number of successes/failures is not
543    reported.
544
545    Note that this function has a side effect of deleting all
546    previously-defined optimism tests.
547
548    The `_req_cases` argument must be a dictionary mapping function names
549    to integers specifying how many distinct tests are required for that
550    function. Tests for files can be required by prepending 'file:' to
551    the filename to require tests for, and code block tests can be
552    required by prepending 'block:' to the exact code block string (but
553    that's quite fragile). If `_req_cases` is None (the default) then
554    the report will include information on all defined tests.
555
556    As a harness function, most arguments are passed through to whatever
557    function is being tested; if that function has arguments named
558    `_req_cases` and/or `_must_pass` you'll have to define your own
559    custom harness that uses different keyword argument names. Because
560    positional arguments are passed through, these two meta-parameters
561    must be given as keyword arguments.
562
563    Note that technically, if the solution code has failing test cases,
564    when `_must_pass` is set to "all" the reports produced will be the
565    same if the submitted code fails the same number of test cases.
566
567    (Note: these docstring paragraphs will be used as the default goal
568    description...)
569    """
570    # Check if optimism is available
571    try:
572        import optimism # noqa F401
573    except Exception:
574        raise NotImplementedError(
575            "check_tests_harness cannot be used because the"
576            " 'optimism' module cannot be imported."
577        )
578
579    # First clean up any existing tests
580    optimism.deleteAllTestSuites()
581
582    # Run the function, ignoring its result
583    function(*args, **kwargs)
584
585    # List all currently defined test cases (i.e., those defined by the
586    # function we're looking at)
587    defined = optimism.listAllTrials()
588
589    report = ""
590
591    # Check each defined case and create a map of the number of passing
592    # and failing cases for each function/file/block tested; as a side
593    # effect add lines to the report detailing any failing cases if
594    # _must_pass is set to "all".
595    caseMap = {}
596    for case in defined:
597        # Figure out the case ID
598        if isinstance(case.manager, optimism.FunctionManager):
599            case_id = case.manager.target.__name__
600            show_case_id = "function:" + case_id
601        elif isinstance(case.manager, optimism.FileManager):
602            case_id = "file:" + case.manager.target
603            show_case_id = case_id
604        elif isinstance(case.manager, optimism.BlockManager):
605            case_id = "block:" + case.manager.target
606            show_case_id = "block:" + repr(case.manager.target)
607        else:
608            case_id = None
609            show_case_id = "unknown"
610
611        caseMap.setdefault(case_id, [show_case_id, 0, 0])
612
613        # Go through each outcome
614        n_failed = 0
615        n_checks = 0
616        for passed, _, _ in case.outcomes:
617            n_checks += 1
618            if not passed:
619                n_failed += 1
620
621        if n_checks > 0 and n_failed == 0:
622            # All checks passed, and there was at least one
623            # This counts as a passing case
624            caseMap[case_id][1] += 1
625
626        elif n_failed > 0:
627            # some checks failed
628            # Record the failure
629            caseMap[case_id][2] += 1
630            if _must_pass == "all":
631                # Note failure in our report, but don't include specific
632                # line numbers, since those might differ between
633                # submitted and solution files
634                report += (
635                    f"{n_failed} checks failed for test(s) of"
636                    f" {show_case_id}\n"
637                )
638
639    # Check that the required number cases are present
640    if _req_cases is None:
641        # Report on every defined test
642        for (case_id, (show_case_id, succeeded, failed)) in caseMap.items():
643            # Skip cases where no checks were performed
644            if succeeded + failed == 0:
645                continue
646
647            if _must_pass is True and succeeded == 0:
648                # if _must_pass is 'all' we've already reported failures
649                report += (
650                    f"{failed} {phrasing.plural(failed, 'check')} failed"
651                    f" for test(s) of {show_case_id}\n"
652                )
653            elif _must_pass:
654                # report success
655                report += (
656                    f"At least one check succeeded for test(s) of"
657                    f" {show_case_id}\n"
658                )
659            else:
660                # must_pass must be False, so we just report that checks
661                # were defined regardless of success/failure
662                report += (
663                    f"Performed at least one check for test(s) of"
664                    f" {show_case_id}\n"
665                )
666    else:
667        # Just report on required tests
668        for req, threshold in _req_cases.items():
669            show_case_id, succeeded, failed = caseMap.get(
670                req,
671                [repr(req), 0, 0]
672            ) # TODO: More elegant here?
673            if _must_pass:
674                if succeeded >= threshold:
675                    cases_passed = phrasing.plural(
676                        threshold,
677                        'case passed',
678                        'cases passed'
679                    )
680                    report += (
681                        f"At least {threshold} {cases_passed} for"
682                        f" test(s) of {show_case_id}\n"
683                    )
684                else:
685                    cases_passed = phrasing.plural(
686                        succeeded,
687                        'case passed',
688                        'cases passed'
689                    )
690                    total = succeeded + failed
691                    if total == succeeded:
692                        cases_passed = phrasing.plural(
693                            total,
694                            'case was defined',
695                            'cases were defined'
696                        )
697                    only = "Only " if succeeded > 0 else ""
698                    out_of = f"/{total}" if total > succeeded else ""
699                    report += (
700                        f"{only}{succeeded}{out_of} {cases_passed} for"
701                        f" test(s) of {show_case_id} ({threshold} were"
702                        f" required)\n"
703                    )
704            else:
705                if succeeded + failed >= threshold:
706                    cases_were = phrasing.plural(
707                        threshold,
708                        'case was',
709                        'cases were'
710                    )
711                    report += (
712                        f"At least {threshold} {cases_were} defined"
713                        f" for {show_case_id}\n"
714                    )
715                else:
716                    total = succeeded + failed
717                    cases_were = phrasing.plural(
718                        total,
719                        'case was',
720                        'cases were'
721                    )
722                    only = "Only " if total > 0 else ""
723                    report += (
724                        f"{only}{total} {cases_were} defined for"
725                        f" {show_case_id} ({threshold}"
726                        f" {phrasing.plural(threshold, 'was', 'were')}"
727                        f" required)\n"
728                    )
729
730    # We return our report, to be compared with the same report when run
731    # against the solution code
732    return report

A test harness (to be used with potluck.specifications.test_with_harness) which will return a string reporting on the aggregate behavior of optimism tests that were defined and checked as a result of running a particular function. A minimum number of distinct optimism tests cases can be required for each of certain target functions, and that those test cases must pass all checks applied (this second check can be skipped by setting _must_pass to False).

If _must_pass is set to the string "all", then all tests must pass, even if more than the required number of tests are defined, otherwise enough tests must pass (i.e., have been checked at least once and have succeeded on every check applied) to meet the minimum requirements, but cases beyond those are allowed to fail. If _must_pass is set to the string "not all" then at least one test must fail, but the specific number of successes/failures is not reported.

Note that this function has a side effect of deleting all previously-defined optimism tests.

The _req_cases argument must be a dictionary mapping function names to integers specifying how many distinct tests are required for that function. Tests for files can be required by prepending 'file:' to the filename to require tests for, and code block tests can be required by prepending 'block:' to the exact code block string (but that's quite fragile). If _req_cases is None (the default) then the report will include information on all defined tests.

As a harness function, most arguments are passed through to whatever function is being tested; if that function has arguments named _req_cases and/or _must_pass you'll have to define your own custom harness that uses different keyword argument names. Because positional arguments are passed through, these two meta-parameters must be given as keyword arguments.

Note that technically, if the solution code has failing test cases, when _must_pass is set to "all" the reports produced will be the same if the submitted code fails the same number of test cases.

(Note: these docstring paragraphs will be used as the default goal description...)

def tests_report_description(target_fn, _req_cases=None, _must_pass=True): View Source

735def tests_report_description(target_fn, _req_cases=None, _must_pass=True):
736    """
737    Returns a goal description tuple suitable for use with
738    `specifications.HasGoal.set_goal_description` when
739    `test_with_harness` has been used to set up `check_tests_harness` as
740    the testing harness. Pass the same target function and keyword
741    arguments used with the test harness (i.e., which were included in
742    the test case).
743
744    TODO: Option for generic version when multiple test cases are grouped?
745    """
746    if _req_cases is None:
747        if _must_pass == "all":
748            return (
749                (
750                    "Must define and successfully check"
751                    " <code>optimism</code> test cases for the correct"
752                    " functions."
753                ),
754                (
755                    "Your code must define and check"
756                    " <code>optimism</code> test cases for each"
757                    " function, file, or code block that the solution"
758                    " code does. The number of test cases that fail at"
759                    " least one check must match the solution results"
760                    " (usually this means no check should fail)."
761                )
762            )
763        elif _must_pass is True:
764            return (
765                (
766                    "Must define and check <code>optimism</code> test"
767                    " cases for the correct functions."
768                ),
769                (
770                    "Your code must define and check"
771                    " <code>optimism</code> test cases for each"
772                    " function, file, or code block that the solution"
773                    " code does. At least one check must succeed for"
774                    " each test case defined by the solution code."
775                )
776            )
777        else:
778            return (
779                (
780                    "Must define and check <code>optimism</code> test"
781                    " cases for the correct functions."
782                ),
783                (
784                    "Your code must define and check"
785                    " <code>optimism</code> test cases for each"
786                    " function, file, or code block that the solution"
787                    " code does. It does not matter if the checks"
788                    " succeed or fail as long as at least one check is"
789                    " performed per test case."
790                )
791            )
792    else:
793        # Build a list of strings describing per-case-id requirements
794        checklist = []
795        for req, threshold in _req_cases.items():
796            if req.startswith('block:'):
797                show_case = (
798                    f"the code block <pre><code>{req[6:]}</code></pre>"
799                )
800            elif req.startswith('file:'):
801                show_case = f"the file '{req[5:]}'"
802            else:
803                show_case = f"the function {req}"
804
805            if _must_pass:
806                checklist.append(
807                    f"All checks must pass for at least {threshold}"
808                    f" test {phrasing.plural(threshold, 'case')} for"
809                    f" {show_case}."
810                )
811            else:
812                checklist.append(
813                    f"At least {threshold} test"
814                    f" {phrasing.plural(threshold, 'case')} for"
815                    f" {show_case} must be defined, and each must"
816                    f" include at least one check (which does not have"
817                    f" to succeed)."
818                )
819
820        # Construct detail text
821        details = ""
822        if checklist:
823            details += (
824                f"The following test case(s) must be established by your"
825                f" <code>{target_fn}</code> function and/or must"
826                f" succeed:"
827            )
828            details += html_tools.build_list(checklist)
829
830        elif _must_pass != "all":
831            # If there are no listed checks, but _req_cases is not None,
832            # you'll need to craft a custom description yourself
833            raise ValueError(
834                "_req_cases did not include any required test cases. You"
835                " should fix that or use a custom description."
836            )
837
838        if _must_pass == "all":
839            details += (
840                "The same number of checks (usually zero) must fail for"
841                " the same test cases as the solution code."
842            )
843
844        return (
845            (
846                f"Your <code>{target_fn}</code> function must establish"
847                f" the correct test cases."
848            ),
849            details
850        )

Returns a goal description tuple suitable for use with specifications.HasGoal.set_goal_description when test_with_harness has been used to set up check_tests_harness as the testing harness. Pass the same target function and keyword arguments used with the test harness (i.e., which were included in the test case).

TODO: Option for generic version when multiple test cases are grouped?