potluck.validation
Machinery for defining requirements for tests. Tests are submitted in a
separate file using the optimism
library, and we can require a certain
number of distinct test cases that target specific functions/files, and
require that all of the checks succeed.
The validation machinery runs the submitted tests file in a directory
with the solution code and checks what test cases it checks and whether
those checks succeed. rubrics.Rubric.validate_tests
can then be used
to generate a report based on all validation goals; the goals in this
file should normally be used as validation goals, not evaluation goals.
1""" 2Machinery for defining requirements for tests. Tests are submitted in a 3separate file using the `optimism` library, and we can require a certain 4number of distinct test cases that target specific functions/files, and 5require that all of the checks succeed. 6 7The validation machinery runs the submitted tests file in a directory 8with the solution code and checks what test cases it checks and whether 9those checks succeed. `rubrics.Rubric.validate_tests` can then be used 10to generate a report based on all validation goals; the goals in this 11file should normally be used as validation goals, not evaluation goals. 12""" 13 14from . import rubrics 15from . import contexts 16from . import context_utils 17from . import phrasing 18from . import html_tools 19 20 21#--------------------------------------------------# 22# Goal subtypes for checking file-level test cases # 23#--------------------------------------------------# 24 25class CasesTest(rubrics.Goal): 26 """ 27 Runs a function against the auto-context for "validation_test_cases". 28 Inherit and override the `check` method with a function that accepts 29 a context and returns a goal evaluation result to define your test. 30 31 Note that these can only be used when the 'optimism' module is 32 available. 33 """ 34 def check(self, context): 35 """ 36 Not implemented; override to define specific tests. 37 """ 38 raise NotImplementedError( 39 "CasesTest is an abstract class that can't be used" 40 " directly." 41 ) 42 43 def __init__( 44 self, 45 taskid, 46 identifier, 47 description=( 48 "BLANK EXPECTATIONS TEST", 49 "THIS GOAL HAS NOT BEEN DEFINED" 50 ), 51 goal_type="testing", 52 uses_slots=("validation_test_cases",), 53 **kwargs 54 ): 55 """ 56 In addition to a task ID, an identifier, and a description, a 57 goal type may be supplied other than the default "testing". 58 59 The categorizer "tests:" will be prepended to the given 60 identifier. 61 62 The slots required should be given as uses_slots, and a relevant 63 context will be selected or created as the testing context. By 64 default the "validation_test_cases" slot is the only one used. 65 66 Any extra arguments are passed through to the `rubrics.Goal` 67 constructor. 68 """ 69 # Auto context dependency based on uses_slots 70 depends = contexts.auto(*uses_slots) 71 if len(depends) == 1: 72 test_context = depends[0] 73 else: 74 # TODO: De-duplicate stuff where one context actually 75 # provides everything needed via inheritance but auto 76 # doesn't see that? 77 test_context = contexts.Context( 78 description=( 79 "Test cases defined by your code", 80 ( 81 "The " + phrasing.comma_list( 82 slot.replace("_", " ") 83 for slot in uses_slots 84 ) 85 + " of your code." 86 ) 87 ), 88 builder=lambda ctx: ctx, 89 depends=depends 90 ) 91 92 if "test_in" not in kwargs: 93 kwargs["test_in"] = {} 94 if "contexts" not in kwargs["test_in"]: 95 kwargs["test_in"]["contexts"] = [ test_context ] 96 97 # Specified goal type 98 if "tags" not in kwargs: 99 kwargs["tags"] = {} 100 kwargs["tags"]["goal_type"] = goal_type 101 102 # Set up rubrics.Goal stuff 103 super().__init__( 104 taskid, 105 "tests:" + identifier, 106 description, 107 **kwargs 108 ) 109 110 # subgoals is inherited (no subgoals) 111 112 # table is inherited 113 114 def evaluate_in_context(self, context=None): 115 """ 116 Runs the checker and returns its result. 117 """ 118 context = context or {} 119 120 try: 121 self.result = self.check(context) 122 123 if self.result is None: 124 raise ValueError( 125 f"Test case check for {self.__class__.__name__}" 126 f" returned None!" 127 ) 128 except Exception: 129 self.result = { 130 "status": "failed", 131 "traceback": html_tools.html_traceback( 132 linkable=context_utils.linkmap(context) 133 ) 134 } 135 self.set_explanation( 136 context, 137 status="crash", 138 default=html_tools.html_traceback( 139 title="Error while checking your test cases.", 140 linkable=context_utils.linkmap(context) 141 ) 142 ) 143 return self.result 144 145 self.set_explanation( 146 context, 147 default=self.result["explanation"] 148 ) 149 150 return self.result 151 152 153class DefinesEnoughTests(CasesTest): 154 """ 155 A test cases checker which ensures that for each of certain listed 156 functions (or files), a certain number of distinct test cases are 157 established (using the `optimism` module). 158 159 Note that functions are specified by name to be matched against 160 __name__ attributes of actual functions checked, so if you're testing 161 methods you just use the method name, and testing decorated functions 162 may be tricky. (TODO: Check if this plays nicely with spec-specified 163 decorations.) 164 165 Test cases are counted as distinct if either their arguments or their 166 provided inputs differ. 167 """ 168 def __init__(self, taskid, function_reqs, file_reqs, **kwargs): 169 """ 170 A task ID is required. The other required arguments are two 171 dictionaries mapping function name strings and then filename 172 strings to integers specifying how many tests are required. 173 174 Other arguments get passed through to `CasesTest` and 175 potentially thence to `rubrics.Goal`. 176 177 The identifier will be "defines_enough". 178 """ 179 self.function_reqs = function_reqs 180 self.file_reqs = file_reqs 181 182 # Check types for function requirements keys and values 183 for fname in function_reqs: 184 if not isinstance(fname, str): 185 raise TypeError( 186 ( 187 "Each function requirement must be a string." 188 " (You used {} as a key, which is a {})." 189 ).format( 190 repr(fname), 191 type(fname) 192 ) 193 ) 194 195 val = function_reqs[fname] 196 if not isinstance(val, int): 197 raise TypeError( 198 ( 199 "Each function requirement must use an integer" 200 " as the value. (requirement with key {} had" 201 " value {} which is a {})." 202 ).format( 203 repr(fname), 204 repr(val), 205 type(val) 206 ) 207 ) 208 209 # Check types for file requirements keys and values 210 for filename in file_reqs: 211 if not isinstance(filename, str): 212 raise TypeError( 213 ( 214 "Each file requirement must be a string." 215 " (You used {} as a key, which is a {})." 216 ).format( 217 repr(filename), 218 type(filename) 219 ) 220 ) 221 222 val = file_reqs[filename] 223 if not isinstance(val, int): 224 raise TypeError( 225 ( 226 "Each file requirement must use an integer as" 227 " the value. (requirement with key {} had" 228 " value {} which is a {})." 229 ).format( 230 repr(filename), 231 repr(val), 232 type(val) 233 ) 234 ) 235 236 # Check if optimism is available 237 try: 238 import optimism # noqa F401 239 except Exception: 240 raise NotImplementedError( 241 "DefinesEnoughTests cannot be used because the" 242 " 'optimism' module cannot be imported." 243 ) 244 245 # Set automatic description 246 if "description" not in kwargs: 247 rlist = [ 248 "Function <code>{}</code>: {} cases".format( 249 fn, 250 required 251 ) 252 for fn, required in self.function_reqs.items() 253 ] + [ 254 "File '{}': {} cases".format( 255 filename, 256 required 257 ) 258 for filename, required in self.file_reqs.items() 259 ] 260 kwargs["description"] = ( 261 "Defines required test cases", 262 ( 263 """\ 264Your code must use the <code>optimism</code> module to create a certain 265number of test cases which use the following functions/files. Test cases 266that are the same as each other (same arguments and/or inputs) don't 267count. (Each test case must include at least one check).\n""" 268 + html_tools.build_list(rlist) 269 ) 270 ) 271 272 super().__init__(taskid, "defines_enough", **kwargs) 273 274 def check(self, context): 275 """ 276 Looks for an adequate number of established test cases in the 277 given context that have recorded checks. 278 """ 279 try: 280 import optimism 281 except Exception: 282 raise NotImplementedError( 283 "Cannot check for test cases because optimism cannot be" 284 " imported." 285 ) 286 cases = context_utils.extract(context, "validation_test_cases") 287 by_fn = {} 288 by_file = {} 289 for case in cases: 290 # Skip test cases that have not been checked 291 if len(case.outcomes) == 0: 292 continue 293 294 # Categorize by function/file tested 295 if issubclass(case.manager.case_type, optimism.FunctionCase): 296 fname = case.manager.target.__name__ 297 add_to = by_fn.setdefault(fname, []) 298 299 # Don't record duplicate cases 300 duplicate = False 301 for recorded in add_to: 302 if ( 303 case.args == recorded.args 304 and case.kwargs == recorded.kwargs 305 and case.inputs == recorded.inputs 306 ): 307 duplicate = True 308 break 309 310 # Record this case 311 if not duplicate: 312 add_to.append(case) 313 314 elif issubclass(case.manager.case_type, optimism.FileCase): 315 add_to = by_file.setdefault(case.manager.target, []) 316 317 # Don't record duplicate cases 318 duplicate = False 319 for recorded in add_to: 320 if ( 321 case.args == recorded.args 322 and case.kwargs == recorded.kwargs 323 and case.inputs == recorded.inputs 324 ): 325 duplicate = True 326 break 327 328 # Record this case 329 if not duplicate: 330 add_to.append(case) 331 332 # Note that we ignore other kinds of cases including block 333 # cases, which would be hard to count/require... 334 335 any_tests = False 336 deficient = False 337 reports = [] 338 for req_file, required in self.file_reqs.items(): 339 cases = by_file.get(req_file, []) 340 count = len(cases) 341 342 if count > 0: 343 any_tests = True 344 345 if count < required: 346 deficient = True 347 symbol = '✗' 348 else: 349 symbol = '✓' 350 351 reports.append( 352 f"{symbol} '{req_file}': {count} / {required}" 353 ) 354 355 for req_fn, required in self.function_reqs.items(): 356 cases = by_fn.get(req_fn, []) 357 count = len(cases) 358 359 if count > 0: 360 any_tests = True 361 362 if count < required: 363 deficient = True 364 symbol = '✗' 365 else: 366 symbol = '✓' 367 368 reports.append( 369 f"{symbol} <code>{req_fn}</code>: {count} / {required}" 370 ) 371 372 if not any_tests: 373 return { 374 "status": "failed", 375 "explanation": ( 376 "Running your module did not establish any test" 377 " cases for required functions or files." 378 ) 379 } 380 elif deficient: 381 return { 382 "status": "partial", 383 "explanation": ( 384 "Your module did not establish as many test cases as" 385 " were required for all functions/files:\n" 386 ) + html_tools.build_list(reports) 387 } 388 else: 389 return { 390 "status": "accomplished", 391 "explanation": ( 392 "Your module established enough test cases for each" 393 " function or file it was required to test." 394 ) 395 } 396 397 398def list_case_outcomes(cases): 399 """ 400 Creates an HTML list out of test case objects. 401 """ 402 items = [] 403 for case in cases: 404 for (passed, tag, message) in case.outcomes: 405 short_tag = tag.split('/')[-1] 406 message = html_tools.escape(message) 407 lines = message.splitlines() 408 lines[0] = lines[0][:2] + lines[0].split('/')[-1] 409 message = html_tools.wrap_text_with_indentation( 410 '\n'.join(lines) 411 ) 412 items.append(f"✗ {short_tag}<br><pre>{message}</pre>") 413 return html_tools.build_list(items) 414 415 416class ChecksSucceed(CasesTest): 417 """ 418 An test case checker which ensures that each recorded outcome for 419 each established test case in the submitted testing module is a 420 success. 421 422 Note that when this goal is checked during validation, tests in the 423 "validation_test_cases" slot have been run against the solution 424 code, whereas when this goal is used during evaluation, those same 425 test cases have been run against the student's submitted code. 426 427 TODO: Manage multi-file submission and/or test file copying so that 428 "validation_test_cases" is actually available during evaluation. 429 """ 430 def __init__(self, taskid, **kwargs): 431 """ 432 A task ID is required. Arguments are passed through to 433 `CasesTest`. 434 435 The identifier will be "checks_succeeded". 436 """ 437 438 try: 439 import optimism # noqa F401 440 except Exception: 441 raise NotImplementedError( 442 "ChecksSucceed cannot be used because the 'optimism'" 443 " module cannot be imported." 444 ) 445 446 if "description" not in kwargs: 447 kwargs["description"] = ( 448 ( 449 "All checks must succeed" 450 ), 451 ( 452 "Every time your code checks a test case using the" 453 " <code>optimism</code> module the check must" 454 " succeed." 455 ) 456 ) 457 458 super().__init__(taskid, "checks_succeeded", **kwargs) 459 460 def check(self, context): 461 """ 462 Looks for any failed outcomes in test cases within the given 463 context. 464 """ 465 cases = context_utils.extract(context, "validation_test_cases") 466 any_failed = False 467 any_passed = False 468 failing = [] 469 for case in cases: 470 failed_here = False 471 for (succeeded, tag, msg) in case.outcomes: 472 if succeeded: 473 any_passed = True 474 else: 475 failed_here = True 476 477 if failed_here: 478 any_failed = True 479 failing.append(case) 480 481 if any_failed: 482 fail_list = list_case_outcomes(failing) 483 if any_passed: 484 return { 485 "status": "partial", 486 "explanation": ( 487 "Some of your code's checks failed:\n" 488 ) + fail_list 489 } 490 else: 491 return { 492 "status": "failed", 493 "explanation": ( 494 "None of your code's checks succeeded:\n" 495 ) + fail_list 496 } 497 else: 498 if any_passed: 499 return { 500 "status": "accomplished", 501 "explanation": ( 502 "All of your code's checks succeeded." 503 ) 504 } 505 else: 506 return { 507 "status": "failed", 508 "explanation": ( 509 "Your code did not check any test cases." 510 ) 511 } 512 513 514#--------------------------------------------------# 515# Harnesses for checking function-level test cases # 516#--------------------------------------------------# 517 518def check_tests_harness( 519 function, 520 *args, 521 _req_cases=None, 522 _must_pass=True, 523 **kwargs 524): 525 """ 526 A test harness (to be used with 527 `potluck.specifications.test_with_harness`) which will return a 528 string reporting on the aggregate behavior of `optimism` tests that 529 were defined and checked as a result of running a particular 530 function. A minimum number of distinct `optimism` tests cases can be 531 required for each of certain target functions, and that those test 532 cases must pass all checks applied (this second check can be skipped 533 by setting `_must_pass` to `False`). 534 535 If `_must_pass` is set to the string "all", then all tests must 536 pass, even if more than the required number of tests are defined, 537 otherwise enough tests must pass (i.e., have been checked at least 538 once and have succeeded on every check applied) to meet the minimum 539 requirements, but cases beyond those are allowed to fail. If 540 `_must_pass` is set to the string "not all" then at least one test 541 must fail, but the specific number of successes/failures is not 542 reported. 543 544 Note that this function has a side effect of deleting all 545 previously-defined optimism tests. 546 547 The `_req_cases` argument must be a dictionary mapping function names 548 to integers specifying how many distinct tests are required for that 549 function. Tests for files can be required by prepending 'file:' to 550 the filename to require tests for, and code block tests can be 551 required by prepending 'block:' to the exact code block string (but 552 that's quite fragile). If `_req_cases` is None (the default) then 553 the report will include information on all defined tests. 554 555 As a harness function, most arguments are passed through to whatever 556 function is being tested; if that function has arguments named 557 `_req_cases` and/or `_must_pass` you'll have to define your own 558 custom harness that uses different keyword argument names. Because 559 positional arguments are passed through, these two meta-parameters 560 must be given as keyword arguments. 561 562 Note that technically, if the solution code has failing test cases, 563 when `_must_pass` is set to "all" the reports produced will be the 564 same if the submitted code fails the same number of test cases. 565 566 (Note: these docstring paragraphs will be used as the default goal 567 description...) 568 """ 569 # Check if optimism is available 570 try: 571 import optimism # noqa F401 572 except Exception: 573 raise NotImplementedError( 574 "check_tests_harness cannot be used because the" 575 " 'optimism' module cannot be imported." 576 ) 577 578 # First clean up any existing tests 579 optimism.deleteAllTestSuites() 580 581 # Run the function, ignoring its result 582 function(*args, **kwargs) 583 584 # List all currently defined test cases (i.e., those defined by the 585 # function we're looking at) 586 defined = optimism.listAllTrials() 587 588 report = "" 589 590 # Check each defined case and create a map of the number of passing 591 # and failing cases for each function/file/block tested; as a side 592 # effect add lines to the report detailing any failing cases if 593 # _must_pass is set to "all". 594 caseMap = {} 595 for case in defined: 596 # Figure out the case ID 597 if isinstance(case.manager, optimism.FunctionManager): 598 case_id = case.manager.target.__name__ 599 show_case_id = "function:" + case_id 600 elif isinstance(case.manager, optimism.FileManager): 601 case_id = "file:" + case.manager.target 602 show_case_id = case_id 603 elif isinstance(case.manager, optimism.BlockManager): 604 case_id = "block:" + case.manager.target 605 show_case_id = "block:" + repr(case.manager.target) 606 else: 607 case_id = None 608 show_case_id = "unknown" 609 610 caseMap.setdefault(case_id, [show_case_id, 0, 0]) 611 612 # Go through each outcome 613 n_failed = 0 614 n_checks = 0 615 for passed, _, _ in case.outcomes: 616 n_checks += 1 617 if not passed: 618 n_failed += 1 619 620 if n_checks > 0 and n_failed == 0: 621 # All checks passed, and there was at least one 622 # This counts as a passing case 623 caseMap[case_id][1] += 1 624 625 elif n_failed > 0: 626 # some checks failed 627 # Record the failure 628 caseMap[case_id][2] += 1 629 if _must_pass == "all": 630 # Note failure in our report, but don't include specific 631 # line numbers, since those might differ between 632 # submitted and solution files 633 report += ( 634 f"{n_failed} checks failed for test(s) of" 635 f" {show_case_id}\n" 636 ) 637 638 # Check that the required number cases are present 639 if _req_cases is None: 640 # Report on every defined test 641 for (case_id, (show_case_id, succeeded, failed)) in caseMap.items(): 642 # Skip cases where no checks were performed 643 if succeeded + failed == 0: 644 continue 645 646 if _must_pass is True and succeeded == 0: 647 # if _must_pass is 'all' we've already reported failures 648 report += ( 649 f"{failed} {phrasing.plural(failed, 'check')} failed" 650 f" for test(s) of {show_case_id}\n" 651 ) 652 elif _must_pass: 653 # report success 654 report += ( 655 f"At least one check succeeded for test(s) of" 656 f" {show_case_id}\n" 657 ) 658 else: 659 # must_pass must be False, so we just report that checks 660 # were defined regardless of success/failure 661 report += ( 662 f"Performed at least one check for test(s) of" 663 f" {show_case_id}\n" 664 ) 665 else: 666 # Just report on required tests 667 for req, threshold in _req_cases.items(): 668 show_case_id, succeeded, failed = caseMap.get( 669 req, 670 [repr(req), 0, 0] 671 ) # TODO: More elegant here? 672 if _must_pass: 673 if succeeded >= threshold: 674 cases_passed = phrasing.plural( 675 threshold, 676 'case passed', 677 'cases passed' 678 ) 679 report += ( 680 f"At least {threshold} {cases_passed} for" 681 f" test(s) of {show_case_id}\n" 682 ) 683 else: 684 cases_passed = phrasing.plural( 685 succeeded, 686 'case passed', 687 'cases passed' 688 ) 689 total = succeeded + failed 690 if total == succeeded: 691 cases_passed = phrasing.plural( 692 total, 693 'case was defined', 694 'cases were defined' 695 ) 696 only = "Only " if succeeded > 0 else "" 697 out_of = f"/{total}" if total > succeeded else "" 698 report += ( 699 f"{only}{succeeded}{out_of} {cases_passed} for" 700 f" test(s) of {show_case_id} ({threshold} were" 701 f" required)\n" 702 ) 703 else: 704 if succeeded + failed >= threshold: 705 cases_were = phrasing.plural( 706 threshold, 707 'case was', 708 'cases were' 709 ) 710 report += ( 711 f"At least {threshold} {cases_were} defined" 712 f" for {show_case_id}\n" 713 ) 714 else: 715 total = succeeded + failed 716 cases_were = phrasing.plural( 717 total, 718 'case was', 719 'cases were' 720 ) 721 only = "Only " if total > 0 else "" 722 report += ( 723 f"{only}{total} {cases_were} defined for" 724 f" {show_case_id} ({threshold}" 725 f" {phrasing.plural(threshold, 'was', 'were')}" 726 f" required)\n" 727 ) 728 729 # We return our report, to be compared with the same report when run 730 # against the solution code 731 return report 732 733 734def tests_report_description(target_fn, _req_cases=None, _must_pass=True): 735 """ 736 Returns a goal description tuple suitable for use with 737 `specifications.HasGoal.set_goal_description` when 738 `test_with_harness` has been used to set up `check_tests_harness` as 739 the testing harness. Pass the same target function and keyword 740 arguments used with the test harness (i.e., which were included in 741 the test case). 742 743 TODO: Option for generic version when multiple test cases are grouped? 744 """ 745 if _req_cases is None: 746 if _must_pass == "all": 747 return ( 748 ( 749 "Must define and successfully check" 750 " <code>optimism</code> test cases for the correct" 751 " functions." 752 ), 753 ( 754 "Your code must define and check" 755 " <code>optimism</code> test cases for each" 756 " function, file, or code block that the solution" 757 " code does. The number of test cases that fail at" 758 " least one check must match the solution results" 759 " (usually this means no check should fail)." 760 ) 761 ) 762 elif _must_pass is True: 763 return ( 764 ( 765 "Must define and check <code>optimism</code> test" 766 " cases for the correct functions." 767 ), 768 ( 769 "Your code must define and check" 770 " <code>optimism</code> test cases for each" 771 " function, file, or code block that the solution" 772 " code does. At least one check must succeed for" 773 " each test case defined by the solution code." 774 ) 775 ) 776 else: 777 return ( 778 ( 779 "Must define and check <code>optimism</code> test" 780 " cases for the correct functions." 781 ), 782 ( 783 "Your code must define and check" 784 " <code>optimism</code> test cases for each" 785 " function, file, or code block that the solution" 786 " code does. It does not matter if the checks" 787 " succeed or fail as long as at least one check is" 788 " performed per test case." 789 ) 790 ) 791 else: 792 # Build a list of strings describing per-case-id requirements 793 checklist = [] 794 for req, threshold in _req_cases.items(): 795 if req.startswith('block:'): 796 show_case = ( 797 f"the code block <pre><code>{req[6:]}</code></pre>" 798 ) 799 elif req.startswith('file:'): 800 show_case = f"the file '{req[5:]}'" 801 else: 802 show_case = f"the function {req}" 803 804 if _must_pass: 805 checklist.append( 806 f"All checks must pass for at least {threshold}" 807 f" test {phrasing.plural(threshold, 'case')} for" 808 f" {show_case}." 809 ) 810 else: 811 checklist.append( 812 f"At least {threshold} test" 813 f" {phrasing.plural(threshold, 'case')} for" 814 f" {show_case} must be defined, and each must" 815 f" include at least one check (which does not have" 816 f" to succeed)." 817 ) 818 819 # Construct detail text 820 details = "" 821 if checklist: 822 details += ( 823 f"The following test case(s) must be established by your" 824 f" <code>{target_fn}</code> function and/or must" 825 f" succeed:" 826 ) 827 details += html_tools.build_list(checklist) 828 829 elif _must_pass != "all": 830 # If there are no listed checks, but _req_cases is not None, 831 # you'll need to craft a custom description yourself 832 raise ValueError( 833 "_req_cases did not include any required test cases. You" 834 " should fix that or use a custom description." 835 ) 836 837 if _must_pass == "all": 838 details += ( 839 "The same number of checks (usually zero) must fail for" 840 " the same test cases as the solution code." 841 ) 842 843 return ( 844 ( 845 f"Your <code>{target_fn}</code> function must establish" 846 f" the correct test cases." 847 ), 848 details 849 )
26class CasesTest(rubrics.Goal): 27 """ 28 Runs a function against the auto-context for "validation_test_cases". 29 Inherit and override the `check` method with a function that accepts 30 a context and returns a goal evaluation result to define your test. 31 32 Note that these can only be used when the 'optimism' module is 33 available. 34 """ 35 def check(self, context): 36 """ 37 Not implemented; override to define specific tests. 38 """ 39 raise NotImplementedError( 40 "CasesTest is an abstract class that can't be used" 41 " directly." 42 ) 43 44 def __init__( 45 self, 46 taskid, 47 identifier, 48 description=( 49 "BLANK EXPECTATIONS TEST", 50 "THIS GOAL HAS NOT BEEN DEFINED" 51 ), 52 goal_type="testing", 53 uses_slots=("validation_test_cases",), 54 **kwargs 55 ): 56 """ 57 In addition to a task ID, an identifier, and a description, a 58 goal type may be supplied other than the default "testing". 59 60 The categorizer "tests:" will be prepended to the given 61 identifier. 62 63 The slots required should be given as uses_slots, and a relevant 64 context will be selected or created as the testing context. By 65 default the "validation_test_cases" slot is the only one used. 66 67 Any extra arguments are passed through to the `rubrics.Goal` 68 constructor. 69 """ 70 # Auto context dependency based on uses_slots 71 depends = contexts.auto(*uses_slots) 72 if len(depends) == 1: 73 test_context = depends[0] 74 else: 75 # TODO: De-duplicate stuff where one context actually 76 # provides everything needed via inheritance but auto 77 # doesn't see that? 78 test_context = contexts.Context( 79 description=( 80 "Test cases defined by your code", 81 ( 82 "The " + phrasing.comma_list( 83 slot.replace("_", " ") 84 for slot in uses_slots 85 ) 86 + " of your code." 87 ) 88 ), 89 builder=lambda ctx: ctx, 90 depends=depends 91 ) 92 93 if "test_in" not in kwargs: 94 kwargs["test_in"] = {} 95 if "contexts" not in kwargs["test_in"]: 96 kwargs["test_in"]["contexts"] = [ test_context ] 97 98 # Specified goal type 99 if "tags" not in kwargs: 100 kwargs["tags"] = {} 101 kwargs["tags"]["goal_type"] = goal_type 102 103 # Set up rubrics.Goal stuff 104 super().__init__( 105 taskid, 106 "tests:" + identifier, 107 description, 108 **kwargs 109 ) 110 111 # subgoals is inherited (no subgoals) 112 113 # table is inherited 114 115 def evaluate_in_context(self, context=None): 116 """ 117 Runs the checker and returns its result. 118 """ 119 context = context or {} 120 121 try: 122 self.result = self.check(context) 123 124 if self.result is None: 125 raise ValueError( 126 f"Test case check for {self.__class__.__name__}" 127 f" returned None!" 128 ) 129 except Exception: 130 self.result = { 131 "status": "failed", 132 "traceback": html_tools.html_traceback( 133 linkable=context_utils.linkmap(context) 134 ) 135 } 136 self.set_explanation( 137 context, 138 status="crash", 139 default=html_tools.html_traceback( 140 title="Error while checking your test cases.", 141 linkable=context_utils.linkmap(context) 142 ) 143 ) 144 return self.result 145 146 self.set_explanation( 147 context, 148 default=self.result["explanation"] 149 ) 150 151 return self.result
Runs a function against the auto-context for "validation_test_cases".
Inherit and override the check
method with a function that accepts
a context and returns a goal evaluation result to define your test.
Note that these can only be used when the 'optimism' module is available.
44 def __init__( 45 self, 46 taskid, 47 identifier, 48 description=( 49 "BLANK EXPECTATIONS TEST", 50 "THIS GOAL HAS NOT BEEN DEFINED" 51 ), 52 goal_type="testing", 53 uses_slots=("validation_test_cases",), 54 **kwargs 55 ): 56 """ 57 In addition to a task ID, an identifier, and a description, a 58 goal type may be supplied other than the default "testing". 59 60 The categorizer "tests:" will be prepended to the given 61 identifier. 62 63 The slots required should be given as uses_slots, and a relevant 64 context will be selected or created as the testing context. By 65 default the "validation_test_cases" slot is the only one used. 66 67 Any extra arguments are passed through to the `rubrics.Goal` 68 constructor. 69 """ 70 # Auto context dependency based on uses_slots 71 depends = contexts.auto(*uses_slots) 72 if len(depends) == 1: 73 test_context = depends[0] 74 else: 75 # TODO: De-duplicate stuff where one context actually 76 # provides everything needed via inheritance but auto 77 # doesn't see that? 78 test_context = contexts.Context( 79 description=( 80 "Test cases defined by your code", 81 ( 82 "The " + phrasing.comma_list( 83 slot.replace("_", " ") 84 for slot in uses_slots 85 ) 86 + " of your code." 87 ) 88 ), 89 builder=lambda ctx: ctx, 90 depends=depends 91 ) 92 93 if "test_in" not in kwargs: 94 kwargs["test_in"] = {} 95 if "contexts" not in kwargs["test_in"]: 96 kwargs["test_in"]["contexts"] = [ test_context ] 97 98 # Specified goal type 99 if "tags" not in kwargs: 100 kwargs["tags"] = {} 101 kwargs["tags"]["goal_type"] = goal_type 102 103 # Set up rubrics.Goal stuff 104 super().__init__( 105 taskid, 106 "tests:" + identifier, 107 description, 108 **kwargs 109 )
In addition to a task ID, an identifier, and a description, a goal type may be supplied other than the default "testing".
The categorizer "tests:" will be prepended to the given identifier.
The slots required should be given as uses_slots, and a relevant context will be selected or created as the testing context. By default the "validation_test_cases" slot is the only one used.
Any extra arguments are passed through to the rubrics.Goal
constructor.
35 def check(self, context): 36 """ 37 Not implemented; override to define specific tests. 38 """ 39 raise NotImplementedError( 40 "CasesTest is an abstract class that can't be used" 41 " directly." 42 )
Not implemented; override to define specific tests.
115 def evaluate_in_context(self, context=None): 116 """ 117 Runs the checker and returns its result. 118 """ 119 context = context or {} 120 121 try: 122 self.result = self.check(context) 123 124 if self.result is None: 125 raise ValueError( 126 f"Test case check for {self.__class__.__name__}" 127 f" returned None!" 128 ) 129 except Exception: 130 self.result = { 131 "status": "failed", 132 "traceback": html_tools.html_traceback( 133 linkable=context_utils.linkmap(context) 134 ) 135 } 136 self.set_explanation( 137 context, 138 status="crash", 139 default=html_tools.html_traceback( 140 title="Error while checking your test cases.", 141 linkable=context_utils.linkmap(context) 142 ) 143 ) 144 return self.result 145 146 self.set_explanation( 147 context, 148 default=self.result["explanation"] 149 ) 150 151 return self.result
Runs the checker and returns its result.
154class DefinesEnoughTests(CasesTest): 155 """ 156 A test cases checker which ensures that for each of certain listed 157 functions (or files), a certain number of distinct test cases are 158 established (using the `optimism` module). 159 160 Note that functions are specified by name to be matched against 161 __name__ attributes of actual functions checked, so if you're testing 162 methods you just use the method name, and testing decorated functions 163 may be tricky. (TODO: Check if this plays nicely with spec-specified 164 decorations.) 165 166 Test cases are counted as distinct if either their arguments or their 167 provided inputs differ. 168 """ 169 def __init__(self, taskid, function_reqs, file_reqs, **kwargs): 170 """ 171 A task ID is required. The other required arguments are two 172 dictionaries mapping function name strings and then filename 173 strings to integers specifying how many tests are required. 174 175 Other arguments get passed through to `CasesTest` and 176 potentially thence to `rubrics.Goal`. 177 178 The identifier will be "defines_enough". 179 """ 180 self.function_reqs = function_reqs 181 self.file_reqs = file_reqs 182 183 # Check types for function requirements keys and values 184 for fname in function_reqs: 185 if not isinstance(fname, str): 186 raise TypeError( 187 ( 188 "Each function requirement must be a string." 189 " (You used {} as a key, which is a {})." 190 ).format( 191 repr(fname), 192 type(fname) 193 ) 194 ) 195 196 val = function_reqs[fname] 197 if not isinstance(val, int): 198 raise TypeError( 199 ( 200 "Each function requirement must use an integer" 201 " as the value. (requirement with key {} had" 202 " value {} which is a {})." 203 ).format( 204 repr(fname), 205 repr(val), 206 type(val) 207 ) 208 ) 209 210 # Check types for file requirements keys and values 211 for filename in file_reqs: 212 if not isinstance(filename, str): 213 raise TypeError( 214 ( 215 "Each file requirement must be a string." 216 " (You used {} as a key, which is a {})." 217 ).format( 218 repr(filename), 219 type(filename) 220 ) 221 ) 222 223 val = file_reqs[filename] 224 if not isinstance(val, int): 225 raise TypeError( 226 ( 227 "Each file requirement must use an integer as" 228 " the value. (requirement with key {} had" 229 " value {} which is a {})." 230 ).format( 231 repr(filename), 232 repr(val), 233 type(val) 234 ) 235 ) 236 237 # Check if optimism is available 238 try: 239 import optimism # noqa F401 240 except Exception: 241 raise NotImplementedError( 242 "DefinesEnoughTests cannot be used because the" 243 " 'optimism' module cannot be imported." 244 ) 245 246 # Set automatic description 247 if "description" not in kwargs: 248 rlist = [ 249 "Function <code>{}</code>: {} cases".format( 250 fn, 251 required 252 ) 253 for fn, required in self.function_reqs.items() 254 ] + [ 255 "File '{}': {} cases".format( 256 filename, 257 required 258 ) 259 for filename, required in self.file_reqs.items() 260 ] 261 kwargs["description"] = ( 262 "Defines required test cases", 263 ( 264 """\ 265Your code must use the <code>optimism</code> module to create a certain 266number of test cases which use the following functions/files. Test cases 267that are the same as each other (same arguments and/or inputs) don't 268count. (Each test case must include at least one check).\n""" 269 + html_tools.build_list(rlist) 270 ) 271 ) 272 273 super().__init__(taskid, "defines_enough", **kwargs) 274 275 def check(self, context): 276 """ 277 Looks for an adequate number of established test cases in the 278 given context that have recorded checks. 279 """ 280 try: 281 import optimism 282 except Exception: 283 raise NotImplementedError( 284 "Cannot check for test cases because optimism cannot be" 285 " imported." 286 ) 287 cases = context_utils.extract(context, "validation_test_cases") 288 by_fn = {} 289 by_file = {} 290 for case in cases: 291 # Skip test cases that have not been checked 292 if len(case.outcomes) == 0: 293 continue 294 295 # Categorize by function/file tested 296 if issubclass(case.manager.case_type, optimism.FunctionCase): 297 fname = case.manager.target.__name__ 298 add_to = by_fn.setdefault(fname, []) 299 300 # Don't record duplicate cases 301 duplicate = False 302 for recorded in add_to: 303 if ( 304 case.args == recorded.args 305 and case.kwargs == recorded.kwargs 306 and case.inputs == recorded.inputs 307 ): 308 duplicate = True 309 break 310 311 # Record this case 312 if not duplicate: 313 add_to.append(case) 314 315 elif issubclass(case.manager.case_type, optimism.FileCase): 316 add_to = by_file.setdefault(case.manager.target, []) 317 318 # Don't record duplicate cases 319 duplicate = False 320 for recorded in add_to: 321 if ( 322 case.args == recorded.args 323 and case.kwargs == recorded.kwargs 324 and case.inputs == recorded.inputs 325 ): 326 duplicate = True 327 break 328 329 # Record this case 330 if not duplicate: 331 add_to.append(case) 332 333 # Note that we ignore other kinds of cases including block 334 # cases, which would be hard to count/require... 335 336 any_tests = False 337 deficient = False 338 reports = [] 339 for req_file, required in self.file_reqs.items(): 340 cases = by_file.get(req_file, []) 341 count = len(cases) 342 343 if count > 0: 344 any_tests = True 345 346 if count < required: 347 deficient = True 348 symbol = '✗' 349 else: 350 symbol = '✓' 351 352 reports.append( 353 f"{symbol} '{req_file}': {count} / {required}" 354 ) 355 356 for req_fn, required in self.function_reqs.items(): 357 cases = by_fn.get(req_fn, []) 358 count = len(cases) 359 360 if count > 0: 361 any_tests = True 362 363 if count < required: 364 deficient = True 365 symbol = '✗' 366 else: 367 symbol = '✓' 368 369 reports.append( 370 f"{symbol} <code>{req_fn}</code>: {count} / {required}" 371 ) 372 373 if not any_tests: 374 return { 375 "status": "failed", 376 "explanation": ( 377 "Running your module did not establish any test" 378 " cases for required functions or files." 379 ) 380 } 381 elif deficient: 382 return { 383 "status": "partial", 384 "explanation": ( 385 "Your module did not establish as many test cases as" 386 " were required for all functions/files:\n" 387 ) + html_tools.build_list(reports) 388 } 389 else: 390 return { 391 "status": "accomplished", 392 "explanation": ( 393 "Your module established enough test cases for each" 394 " function or file it was required to test." 395 ) 396 }
A test cases checker which ensures that for each of certain listed
functions (or files), a certain number of distinct test cases are
established (using the optimism
module).
Note that functions are specified by name to be matched against __name__ attributes of actual functions checked, so if you're testing methods you just use the method name, and testing decorated functions may be tricky. (TODO: Check if this plays nicely with spec-specified decorations.)
Test cases are counted as distinct if either their arguments or their provided inputs differ.
169 def __init__(self, taskid, function_reqs, file_reqs, **kwargs): 170 """ 171 A task ID is required. The other required arguments are two 172 dictionaries mapping function name strings and then filename 173 strings to integers specifying how many tests are required. 174 175 Other arguments get passed through to `CasesTest` and 176 potentially thence to `rubrics.Goal`. 177 178 The identifier will be "defines_enough". 179 """ 180 self.function_reqs = function_reqs 181 self.file_reqs = file_reqs 182 183 # Check types for function requirements keys and values 184 for fname in function_reqs: 185 if not isinstance(fname, str): 186 raise TypeError( 187 ( 188 "Each function requirement must be a string." 189 " (You used {} as a key, which is a {})." 190 ).format( 191 repr(fname), 192 type(fname) 193 ) 194 ) 195 196 val = function_reqs[fname] 197 if not isinstance(val, int): 198 raise TypeError( 199 ( 200 "Each function requirement must use an integer" 201 " as the value. (requirement with key {} had" 202 " value {} which is a {})." 203 ).format( 204 repr(fname), 205 repr(val), 206 type(val) 207 ) 208 ) 209 210 # Check types for file requirements keys and values 211 for filename in file_reqs: 212 if not isinstance(filename, str): 213 raise TypeError( 214 ( 215 "Each file requirement must be a string." 216 " (You used {} as a key, which is a {})." 217 ).format( 218 repr(filename), 219 type(filename) 220 ) 221 ) 222 223 val = file_reqs[filename] 224 if not isinstance(val, int): 225 raise TypeError( 226 ( 227 "Each file requirement must use an integer as" 228 " the value. (requirement with key {} had" 229 " value {} which is a {})." 230 ).format( 231 repr(filename), 232 repr(val), 233 type(val) 234 ) 235 ) 236 237 # Check if optimism is available 238 try: 239 import optimism # noqa F401 240 except Exception: 241 raise NotImplementedError( 242 "DefinesEnoughTests cannot be used because the" 243 " 'optimism' module cannot be imported." 244 ) 245 246 # Set automatic description 247 if "description" not in kwargs: 248 rlist = [ 249 "Function <code>{}</code>: {} cases".format( 250 fn, 251 required 252 ) 253 for fn, required in self.function_reqs.items() 254 ] + [ 255 "File '{}': {} cases".format( 256 filename, 257 required 258 ) 259 for filename, required in self.file_reqs.items() 260 ] 261 kwargs["description"] = ( 262 "Defines required test cases", 263 ( 264 """\ 265Your code must use the <code>optimism</code> module to create a certain 266number of test cases which use the following functions/files. Test cases 267that are the same as each other (same arguments and/or inputs) don't 268count. (Each test case must include at least one check).\n""" 269 + html_tools.build_list(rlist) 270 ) 271 ) 272 273 super().__init__(taskid, "defines_enough", **kwargs)
A task ID is required. The other required arguments are two dictionaries mapping function name strings and then filename strings to integers specifying how many tests are required.
Other arguments get passed through to CasesTest
and
potentially thence to rubrics.Goal
.
The identifier will be "defines_enough".
275 def check(self, context): 276 """ 277 Looks for an adequate number of established test cases in the 278 given context that have recorded checks. 279 """ 280 try: 281 import optimism 282 except Exception: 283 raise NotImplementedError( 284 "Cannot check for test cases because optimism cannot be" 285 " imported." 286 ) 287 cases = context_utils.extract(context, "validation_test_cases") 288 by_fn = {} 289 by_file = {} 290 for case in cases: 291 # Skip test cases that have not been checked 292 if len(case.outcomes) == 0: 293 continue 294 295 # Categorize by function/file tested 296 if issubclass(case.manager.case_type, optimism.FunctionCase): 297 fname = case.manager.target.__name__ 298 add_to = by_fn.setdefault(fname, []) 299 300 # Don't record duplicate cases 301 duplicate = False 302 for recorded in add_to: 303 if ( 304 case.args == recorded.args 305 and case.kwargs == recorded.kwargs 306 and case.inputs == recorded.inputs 307 ): 308 duplicate = True 309 break 310 311 # Record this case 312 if not duplicate: 313 add_to.append(case) 314 315 elif issubclass(case.manager.case_type, optimism.FileCase): 316 add_to = by_file.setdefault(case.manager.target, []) 317 318 # Don't record duplicate cases 319 duplicate = False 320 for recorded in add_to: 321 if ( 322 case.args == recorded.args 323 and case.kwargs == recorded.kwargs 324 and case.inputs == recorded.inputs 325 ): 326 duplicate = True 327 break 328 329 # Record this case 330 if not duplicate: 331 add_to.append(case) 332 333 # Note that we ignore other kinds of cases including block 334 # cases, which would be hard to count/require... 335 336 any_tests = False 337 deficient = False 338 reports = [] 339 for req_file, required in self.file_reqs.items(): 340 cases = by_file.get(req_file, []) 341 count = len(cases) 342 343 if count > 0: 344 any_tests = True 345 346 if count < required: 347 deficient = True 348 symbol = '✗' 349 else: 350 symbol = '✓' 351 352 reports.append( 353 f"{symbol} '{req_file}': {count} / {required}" 354 ) 355 356 for req_fn, required in self.function_reqs.items(): 357 cases = by_fn.get(req_fn, []) 358 count = len(cases) 359 360 if count > 0: 361 any_tests = True 362 363 if count < required: 364 deficient = True 365 symbol = '✗' 366 else: 367 symbol = '✓' 368 369 reports.append( 370 f"{symbol} <code>{req_fn}</code>: {count} / {required}" 371 ) 372 373 if not any_tests: 374 return { 375 "status": "failed", 376 "explanation": ( 377 "Running your module did not establish any test" 378 " cases for required functions or files." 379 ) 380 } 381 elif deficient: 382 return { 383 "status": "partial", 384 "explanation": ( 385 "Your module did not establish as many test cases as" 386 " were required for all functions/files:\n" 387 ) + html_tools.build_list(reports) 388 } 389 else: 390 return { 391 "status": "accomplished", 392 "explanation": ( 393 "Your module established enough test cases for each" 394 " function or file it was required to test." 395 ) 396 }
Looks for an adequate number of established test cases in the given context that have recorded checks.
399def list_case_outcomes(cases): 400 """ 401 Creates an HTML list out of test case objects. 402 """ 403 items = [] 404 for case in cases: 405 for (passed, tag, message) in case.outcomes: 406 short_tag = tag.split('/')[-1] 407 message = html_tools.escape(message) 408 lines = message.splitlines() 409 lines[0] = lines[0][:2] + lines[0].split('/')[-1] 410 message = html_tools.wrap_text_with_indentation( 411 '\n'.join(lines) 412 ) 413 items.append(f"✗ {short_tag}<br><pre>{message}</pre>") 414 return html_tools.build_list(items)
Creates an HTML list out of test case objects.
417class ChecksSucceed(CasesTest): 418 """ 419 An test case checker which ensures that each recorded outcome for 420 each established test case in the submitted testing module is a 421 success. 422 423 Note that when this goal is checked during validation, tests in the 424 "validation_test_cases" slot have been run against the solution 425 code, whereas when this goal is used during evaluation, those same 426 test cases have been run against the student's submitted code. 427 428 TODO: Manage multi-file submission and/or test file copying so that 429 "validation_test_cases" is actually available during evaluation. 430 """ 431 def __init__(self, taskid, **kwargs): 432 """ 433 A task ID is required. Arguments are passed through to 434 `CasesTest`. 435 436 The identifier will be "checks_succeeded". 437 """ 438 439 try: 440 import optimism # noqa F401 441 except Exception: 442 raise NotImplementedError( 443 "ChecksSucceed cannot be used because the 'optimism'" 444 " module cannot be imported." 445 ) 446 447 if "description" not in kwargs: 448 kwargs["description"] = ( 449 ( 450 "All checks must succeed" 451 ), 452 ( 453 "Every time your code checks a test case using the" 454 " <code>optimism</code> module the check must" 455 " succeed." 456 ) 457 ) 458 459 super().__init__(taskid, "checks_succeeded", **kwargs) 460 461 def check(self, context): 462 """ 463 Looks for any failed outcomes in test cases within the given 464 context. 465 """ 466 cases = context_utils.extract(context, "validation_test_cases") 467 any_failed = False 468 any_passed = False 469 failing = [] 470 for case in cases: 471 failed_here = False 472 for (succeeded, tag, msg) in case.outcomes: 473 if succeeded: 474 any_passed = True 475 else: 476 failed_here = True 477 478 if failed_here: 479 any_failed = True 480 failing.append(case) 481 482 if any_failed: 483 fail_list = list_case_outcomes(failing) 484 if any_passed: 485 return { 486 "status": "partial", 487 "explanation": ( 488 "Some of your code's checks failed:\n" 489 ) + fail_list 490 } 491 else: 492 return { 493 "status": "failed", 494 "explanation": ( 495 "None of your code's checks succeeded:\n" 496 ) + fail_list 497 } 498 else: 499 if any_passed: 500 return { 501 "status": "accomplished", 502 "explanation": ( 503 "All of your code's checks succeeded." 504 ) 505 } 506 else: 507 return { 508 "status": "failed", 509 "explanation": ( 510 "Your code did not check any test cases." 511 ) 512 }
An test case checker which ensures that each recorded outcome for each established test case in the submitted testing module is a success.
Note that when this goal is checked during validation, tests in the "validation_test_cases" slot have been run against the solution code, whereas when this goal is used during evaluation, those same test cases have been run against the student's submitted code.
TODO: Manage multi-file submission and/or test file copying so that "validation_test_cases" is actually available during evaluation.
431 def __init__(self, taskid, **kwargs): 432 """ 433 A task ID is required. Arguments are passed through to 434 `CasesTest`. 435 436 The identifier will be "checks_succeeded". 437 """ 438 439 try: 440 import optimism # noqa F401 441 except Exception: 442 raise NotImplementedError( 443 "ChecksSucceed cannot be used because the 'optimism'" 444 " module cannot be imported." 445 ) 446 447 if "description" not in kwargs: 448 kwargs["description"] = ( 449 ( 450 "All checks must succeed" 451 ), 452 ( 453 "Every time your code checks a test case using the" 454 " <code>optimism</code> module the check must" 455 " succeed." 456 ) 457 ) 458 459 super().__init__(taskid, "checks_succeeded", **kwargs)
A task ID is required. Arguments are passed through to
CasesTest
.
The identifier will be "checks_succeeded".
461 def check(self, context): 462 """ 463 Looks for any failed outcomes in test cases within the given 464 context. 465 """ 466 cases = context_utils.extract(context, "validation_test_cases") 467 any_failed = False 468 any_passed = False 469 failing = [] 470 for case in cases: 471 failed_here = False 472 for (succeeded, tag, msg) in case.outcomes: 473 if succeeded: 474 any_passed = True 475 else: 476 failed_here = True 477 478 if failed_here: 479 any_failed = True 480 failing.append(case) 481 482 if any_failed: 483 fail_list = list_case_outcomes(failing) 484 if any_passed: 485 return { 486 "status": "partial", 487 "explanation": ( 488 "Some of your code's checks failed:\n" 489 ) + fail_list 490 } 491 else: 492 return { 493 "status": "failed", 494 "explanation": ( 495 "None of your code's checks succeeded:\n" 496 ) + fail_list 497 } 498 else: 499 if any_passed: 500 return { 501 "status": "accomplished", 502 "explanation": ( 503 "All of your code's checks succeeded." 504 ) 505 } 506 else: 507 return { 508 "status": "failed", 509 "explanation": ( 510 "Your code did not check any test cases." 511 ) 512 }
Looks for any failed outcomes in test cases within the given context.
519def check_tests_harness( 520 function, 521 *args, 522 _req_cases=None, 523 _must_pass=True, 524 **kwargs 525): 526 """ 527 A test harness (to be used with 528 `potluck.specifications.test_with_harness`) which will return a 529 string reporting on the aggregate behavior of `optimism` tests that 530 were defined and checked as a result of running a particular 531 function. A minimum number of distinct `optimism` tests cases can be 532 required for each of certain target functions, and that those test 533 cases must pass all checks applied (this second check can be skipped 534 by setting `_must_pass` to `False`). 535 536 If `_must_pass` is set to the string "all", then all tests must 537 pass, even if more than the required number of tests are defined, 538 otherwise enough tests must pass (i.e., have been checked at least 539 once and have succeeded on every check applied) to meet the minimum 540 requirements, but cases beyond those are allowed to fail. If 541 `_must_pass` is set to the string "not all" then at least one test 542 must fail, but the specific number of successes/failures is not 543 reported. 544 545 Note that this function has a side effect of deleting all 546 previously-defined optimism tests. 547 548 The `_req_cases` argument must be a dictionary mapping function names 549 to integers specifying how many distinct tests are required for that 550 function. Tests for files can be required by prepending 'file:' to 551 the filename to require tests for, and code block tests can be 552 required by prepending 'block:' to the exact code block string (but 553 that's quite fragile). If `_req_cases` is None (the default) then 554 the report will include information on all defined tests. 555 556 As a harness function, most arguments are passed through to whatever 557 function is being tested; if that function has arguments named 558 `_req_cases` and/or `_must_pass` you'll have to define your own 559 custom harness that uses different keyword argument names. Because 560 positional arguments are passed through, these two meta-parameters 561 must be given as keyword arguments. 562 563 Note that technically, if the solution code has failing test cases, 564 when `_must_pass` is set to "all" the reports produced will be the 565 same if the submitted code fails the same number of test cases. 566 567 (Note: these docstring paragraphs will be used as the default goal 568 description...) 569 """ 570 # Check if optimism is available 571 try: 572 import optimism # noqa F401 573 except Exception: 574 raise NotImplementedError( 575 "check_tests_harness cannot be used because the" 576 " 'optimism' module cannot be imported." 577 ) 578 579 # First clean up any existing tests 580 optimism.deleteAllTestSuites() 581 582 # Run the function, ignoring its result 583 function(*args, **kwargs) 584 585 # List all currently defined test cases (i.e., those defined by the 586 # function we're looking at) 587 defined = optimism.listAllTrials() 588 589 report = "" 590 591 # Check each defined case and create a map of the number of passing 592 # and failing cases for each function/file/block tested; as a side 593 # effect add lines to the report detailing any failing cases if 594 # _must_pass is set to "all". 595 caseMap = {} 596 for case in defined: 597 # Figure out the case ID 598 if isinstance(case.manager, optimism.FunctionManager): 599 case_id = case.manager.target.__name__ 600 show_case_id = "function:" + case_id 601 elif isinstance(case.manager, optimism.FileManager): 602 case_id = "file:" + case.manager.target 603 show_case_id = case_id 604 elif isinstance(case.manager, optimism.BlockManager): 605 case_id = "block:" + case.manager.target 606 show_case_id = "block:" + repr(case.manager.target) 607 else: 608 case_id = None 609 show_case_id = "unknown" 610 611 caseMap.setdefault(case_id, [show_case_id, 0, 0]) 612 613 # Go through each outcome 614 n_failed = 0 615 n_checks = 0 616 for passed, _, _ in case.outcomes: 617 n_checks += 1 618 if not passed: 619 n_failed += 1 620 621 if n_checks > 0 and n_failed == 0: 622 # All checks passed, and there was at least one 623 # This counts as a passing case 624 caseMap[case_id][1] += 1 625 626 elif n_failed > 0: 627 # some checks failed 628 # Record the failure 629 caseMap[case_id][2] += 1 630 if _must_pass == "all": 631 # Note failure in our report, but don't include specific 632 # line numbers, since those might differ between 633 # submitted and solution files 634 report += ( 635 f"{n_failed} checks failed for test(s) of" 636 f" {show_case_id}\n" 637 ) 638 639 # Check that the required number cases are present 640 if _req_cases is None: 641 # Report on every defined test 642 for (case_id, (show_case_id, succeeded, failed)) in caseMap.items(): 643 # Skip cases where no checks were performed 644 if succeeded + failed == 0: 645 continue 646 647 if _must_pass is True and succeeded == 0: 648 # if _must_pass is 'all' we've already reported failures 649 report += ( 650 f"{failed} {phrasing.plural(failed, 'check')} failed" 651 f" for test(s) of {show_case_id}\n" 652 ) 653 elif _must_pass: 654 # report success 655 report += ( 656 f"At least one check succeeded for test(s) of" 657 f" {show_case_id}\n" 658 ) 659 else: 660 # must_pass must be False, so we just report that checks 661 # were defined regardless of success/failure 662 report += ( 663 f"Performed at least one check for test(s) of" 664 f" {show_case_id}\n" 665 ) 666 else: 667 # Just report on required tests 668 for req, threshold in _req_cases.items(): 669 show_case_id, succeeded, failed = caseMap.get( 670 req, 671 [repr(req), 0, 0] 672 ) # TODO: More elegant here? 673 if _must_pass: 674 if succeeded >= threshold: 675 cases_passed = phrasing.plural( 676 threshold, 677 'case passed', 678 'cases passed' 679 ) 680 report += ( 681 f"At least {threshold} {cases_passed} for" 682 f" test(s) of {show_case_id}\n" 683 ) 684 else: 685 cases_passed = phrasing.plural( 686 succeeded, 687 'case passed', 688 'cases passed' 689 ) 690 total = succeeded + failed 691 if total == succeeded: 692 cases_passed = phrasing.plural( 693 total, 694 'case was defined', 695 'cases were defined' 696 ) 697 only = "Only " if succeeded > 0 else "" 698 out_of = f"/{total}" if total > succeeded else "" 699 report += ( 700 f"{only}{succeeded}{out_of} {cases_passed} for" 701 f" test(s) of {show_case_id} ({threshold} were" 702 f" required)\n" 703 ) 704 else: 705 if succeeded + failed >= threshold: 706 cases_were = phrasing.plural( 707 threshold, 708 'case was', 709 'cases were' 710 ) 711 report += ( 712 f"At least {threshold} {cases_were} defined" 713 f" for {show_case_id}\n" 714 ) 715 else: 716 total = succeeded + failed 717 cases_were = phrasing.plural( 718 total, 719 'case was', 720 'cases were' 721 ) 722 only = "Only " if total > 0 else "" 723 report += ( 724 f"{only}{total} {cases_were} defined for" 725 f" {show_case_id} ({threshold}" 726 f" {phrasing.plural(threshold, 'was', 'were')}" 727 f" required)\n" 728 ) 729 730 # We return our report, to be compared with the same report when run 731 # against the solution code 732 return report
A test harness (to be used with
potluck.specifications.test_with_harness
) which will return a
string reporting on the aggregate behavior of optimism
tests that
were defined and checked as a result of running a particular
function. A minimum number of distinct optimism
tests cases can be
required for each of certain target functions, and that those test
cases must pass all checks applied (this second check can be skipped
by setting _must_pass
to False
).
If _must_pass
is set to the string "all", then all tests must
pass, even if more than the required number of tests are defined,
otherwise enough tests must pass (i.e., have been checked at least
once and have succeeded on every check applied) to meet the minimum
requirements, but cases beyond those are allowed to fail. If
_must_pass
is set to the string "not all" then at least one test
must fail, but the specific number of successes/failures is not
reported.
Note that this function has a side effect of deleting all previously-defined optimism tests.
The _req_cases
argument must be a dictionary mapping function names
to integers specifying how many distinct tests are required for that
function. Tests for files can be required by prepending 'file:' to
the filename to require tests for, and code block tests can be
required by prepending 'block:' to the exact code block string (but
that's quite fragile). If _req_cases
is None (the default) then
the report will include information on all defined tests.
As a harness function, most arguments are passed through to whatever
function is being tested; if that function has arguments named
_req_cases
and/or _must_pass
you'll have to define your own
custom harness that uses different keyword argument names. Because
positional arguments are passed through, these two meta-parameters
must be given as keyword arguments.
Note that technically, if the solution code has failing test cases,
when _must_pass
is set to "all" the reports produced will be the
same if the submitted code fails the same number of test cases.
(Note: these docstring paragraphs will be used as the default goal description...)
735def tests_report_description(target_fn, _req_cases=None, _must_pass=True): 736 """ 737 Returns a goal description tuple suitable for use with 738 `specifications.HasGoal.set_goal_description` when 739 `test_with_harness` has been used to set up `check_tests_harness` as 740 the testing harness. Pass the same target function and keyword 741 arguments used with the test harness (i.e., which were included in 742 the test case). 743 744 TODO: Option for generic version when multiple test cases are grouped? 745 """ 746 if _req_cases is None: 747 if _must_pass == "all": 748 return ( 749 ( 750 "Must define and successfully check" 751 " <code>optimism</code> test cases for the correct" 752 " functions." 753 ), 754 ( 755 "Your code must define and check" 756 " <code>optimism</code> test cases for each" 757 " function, file, or code block that the solution" 758 " code does. The number of test cases that fail at" 759 " least one check must match the solution results" 760 " (usually this means no check should fail)." 761 ) 762 ) 763 elif _must_pass is True: 764 return ( 765 ( 766 "Must define and check <code>optimism</code> test" 767 " cases for the correct functions." 768 ), 769 ( 770 "Your code must define and check" 771 " <code>optimism</code> test cases for each" 772 " function, file, or code block that the solution" 773 " code does. At least one check must succeed for" 774 " each test case defined by the solution code." 775 ) 776 ) 777 else: 778 return ( 779 ( 780 "Must define and check <code>optimism</code> test" 781 " cases for the correct functions." 782 ), 783 ( 784 "Your code must define and check" 785 " <code>optimism</code> test cases for each" 786 " function, file, or code block that the solution" 787 " code does. It does not matter if the checks" 788 " succeed or fail as long as at least one check is" 789 " performed per test case." 790 ) 791 ) 792 else: 793 # Build a list of strings describing per-case-id requirements 794 checklist = [] 795 for req, threshold in _req_cases.items(): 796 if req.startswith('block:'): 797 show_case = ( 798 f"the code block <pre><code>{req[6:]}</code></pre>" 799 ) 800 elif req.startswith('file:'): 801 show_case = f"the file '{req[5:]}'" 802 else: 803 show_case = f"the function {req}" 804 805 if _must_pass: 806 checklist.append( 807 f"All checks must pass for at least {threshold}" 808 f" test {phrasing.plural(threshold, 'case')} for" 809 f" {show_case}." 810 ) 811 else: 812 checklist.append( 813 f"At least {threshold} test" 814 f" {phrasing.plural(threshold, 'case')} for" 815 f" {show_case} must be defined, and each must" 816 f" include at least one check (which does not have" 817 f" to succeed)." 818 ) 819 820 # Construct detail text 821 details = "" 822 if checklist: 823 details += ( 824 f"The following test case(s) must be established by your" 825 f" <code>{target_fn}</code> function and/or must" 826 f" succeed:" 827 ) 828 details += html_tools.build_list(checklist) 829 830 elif _must_pass != "all": 831 # If there are no listed checks, but _req_cases is not None, 832 # you'll need to craft a custom description yourself 833 raise ValueError( 834 "_req_cases did not include any required test cases. You" 835 " should fix that or use a custom description." 836 ) 837 838 if _must_pass == "all": 839 details += ( 840 "The same number of checks (usually zero) must fail for" 841 " the same test cases as the solution code." 842 ) 843 844 return ( 845 ( 846 f"Your <code>{target_fn}</code> function must establish" 847 f" the correct test cases." 848 ), 849 details 850 )
Returns a goal description tuple suitable for use with
specifications.HasGoal.set_goal_description
when
test_with_harness
has been used to set up check_tests_harness
as
the testing harness. Pass the same target function and keyword
arguments used with the test harness (i.e., which were included in
the test case).
TODO: Option for generic version when multiple test cases are grouped?