potluck.compare
Functions for comparing Python values, and for rendering the results of those comparisons as HTML.
compare.py
1""" 2Functions for comparing Python values, and for rendering the results of 3those comparisons as HTML. 4 5compare.py 6""" 7 8import re 9import math 10import cmath 11import difflib 12 13from . import html_tools 14from . import phrasing 15 16 17#-----------------# 18# Strict equality # 19#-----------------# 20 21def report_unequal(val, ref): 22 """ 23 Returns a status/explanation dictionary reporting that two values 24 are unequal, including a report on their differences, which has 25 different forms depending on the values themselves. 26 """ 27 if isinstance(val, str) and isinstance(ref, str): 28 val_lines = val.splitlines(keepends=True) 29 ref_lines = ref.splitlines(keepends=True) 30 31 diff = html_tools.html_diff_table( 32 val_lines, 33 ref_lines, 34 'Actual value', 35 'Corresponding lines (if any) in expected value' 36 ) 37 else: 38 rval = repr(val) 39 rexp = repr(ref) 40 if max(len(rval), len(rexp)) < 50: 41 diff = "Actual: {}<br>\nExpected: {}".format( 42 repr(val), 43 repr(ref) 44 ) 45 else: 46 diff = "Actual:<br>\n{}<br><br>\nExpected:<br>\n{}".format( 47 html_tools.dynamic_html_repr(val), 48 html_tools.dynamic_html_repr(ref) 49 ) 50 51 return { 52 "status": "failed", 53 "explanation": ( 54 "Actual value is different from the reference" 55 " value:<br>\n{}" 56 ).format(diff) 57 } 58 59 60def report_equal(val, ref): 61 """ 62 Returns a status/explanation dictionary reporting that two values 63 are equivalent. 64 """ 65 val_repr = repr(val) 66 if isinstance(val, str) and '\n' in val: 67 val_repr = "'''\\\n{}'''".format(val) 68 return { 69 "status": "accomplished", 70 "explanation": ( 71 "Actual value is the same as the reference value:<br>\n{}" 72 ).format( 73 html_tools.html_output_details( 74 val_repr, # TODO: Pretty printing here 75 "Value tested" 76 ) 77 ) 78 } 79 80 81def strict_equality_checker(val, ref, memo=None): 82 """ 83 A checker to be used with OutputTest or ValueTest goals that ensures 84 the output or value being checked is strictly equal to the reference 85 output or value. If there is a difference, some kind of diff is 86 printed, depending on the types of the values. 87 88 Handles recursive structures composed of tuples, lists, sets, and/or 89 dictionaries, but may choke on recursive structures stored in custom 90 objects, in which case those objects will only be considered equal 91 if they are the same object. 92 """ 93 if memo is None: 94 memo = set() 95 96 mkey = (id(val), id(ref)) 97 # If we're already in the process of comparing these two objects to 98 # each other, and we come across another comparison of the two, we 99 # can safely return True for the sub-comparison, and let other 100 # comparisons determine if they're equal or not. 101 if mkey in memo: 102 return { 103 "status": "accomplished", 104 "explanation": "Recursive comparison." 105 } 106 107 # Past this point, recursion might happen 108 memo.add(mkey) 109 110 if type(val) != type(ref): 111 return report_unequal(val, ref) 112 elif isinstance(val, (int, float, complex, type(None), bool, str)): 113 if val == ref: 114 return report_equal(val, ref) 115 else: 116 return report_unequal(val, ref) 117 elif isinstance(val, (tuple, list)): 118 if len(val) != len(ref): 119 return report_unequal(val, ref) 120 else: 121 test = all( 122 strict_equality_checker(v, r, memo)\ 123 ["status"] == "accomplished" # noqa E211 124 for (v, r) in zip(val, ref) 125 ) 126 if test: 127 return report_equal(val, ref) 128 else: 129 return report_unequal(val, ref) 130 elif isinstance(val, (set)): 131 sv = sorted(val) 132 sr = sorted(ref) 133 test = strict_equality_checker(sv, sr, memo) 134 if test["status"] == "accomplished": 135 return report_equal(val, ref) 136 else: 137 return report_unequal(val, ref) 138 elif isinstance(val, (dict)): 139 test = strict_equality_checker( 140 sorted(val.keys()), 141 sorted(ref.keys()), 142 memo 143 ) 144 if test["status"] != "accomplished": 145 return report_unequal(val, ref) 146 147 failed = False 148 for key in val: # keys must be same at this point 149 test = strict_equality_checker(val[key], ref[key], memo) 150 if test["status"] != "accomplished": 151 failed = True 152 break 153 154 if failed: 155 return report_unequal(val, ref) 156 else: 157 return report_equal(val, ref) 158 else: 159 # Some kind of object that we don't know about; try calling 160 # __eq__ if it has it, an upon a recursion error, simply 161 # compare IDs (which is Python default behavior when __eq__ 162 # isn't present). 163 if hasattr(val, "__eq__"): 164 try: 165 test = val == ref 166 except RecursionError: 167 test = val is ref 168 else: 169 test = val is ref 170 171 if test: 172 return report_equal(val, ref) 173 else: 174 return report_unequal(val, ref) 175 176 177#---------------------------# 178# Floating-point comparison # 179#---------------------------# 180 181def round_to_sig_figs(n, figs=5): 182 """ 183 Rounds to the given number of significant figures (not decimal 184 places). Computes the location of the first significant figure, and 185 calls round with an appropriate (and possibly negative) precision 186 value. Still subject to the limits of floating point numbers for very 187 small values. 188 """ 189 if n == 0: 190 return 0 191 exp = math.floor(math.log10(abs(n))) 192 round_to = figs - exp 193 return round(n, round_to) 194 195 196def build_float_equality_checker(sig_figs=5, use_decimal_places=False): 197 """ 198 Builds and returns a checker function that tests whether the 199 submitted value (a float) is equal to the reference value (also a 200 float) up to the given number of significant figures (to avoid 201 penalizing students for floating point rounding issues if they did 202 math operations in a different order from the solution. The 203 significant figures are actually treated as such, not as decimal 204 places, using round_to_sig_figs. This behavior is altered if 205 use_decimal_places is True, in which case sig_figs is treated not as 206 a number of significant figures but as a number of decimal places to 207 round to. 208 """ 209 def check_float_equality(val, ref): 210 """ 211 Checks floating-point equality after rounding to a certain number 212 of significant figures (or possibly decimal places). 213 """ 214 if use_decimal_places: 215 rval = round(val, sig_figs) 216 rref = round(ref, sig_figs) 217 else: 218 rval = round_to_sig_figs(val, sig_figs) 219 rref = round_to_sig_figs(ref, sig_figs) 220 221 if rval == rref: 222 return { 223 "status": "accomplished", 224 "explanation": ( 225 "Value {} matches expected value to the required" 226 + " precision." 227 ).format(val) 228 } 229 else: 230 return { 231 "status": "failed", 232 "explanation": ( 233 "Value {} does not match expected value {} to the " 234 + "required precision. Compared as:<br>\n" 235 + "{} (value)<br>\n" 236 + "{} (expected)" 237 ).format(val, ref, rval, rref) 238 } 239 240 return check_float_equality 241 242 243FLOAT_REL_TOLERANCE = 1e-8 244""" 245The relative tolerance for floating-point similarity (see 246`cmath.isclose`). 247""" 248 249FLOAT_ABS_TOLERANCE = 1e-8 250""" 251The absolute tolerance for floating-point similarity (see 252`cmath.isclose`). 253""" 254 255 256def numbers_are_equalish(val, ref): 257 """ 258 Uses `cmath.isclose` to compare two floating-point numbers, and 259 returns an evaluation result (a dictionary w/ status and explanation 260 slots). 261 262 The first number should be from the submitted code and the second 263 should be the correct value. 264 """ 265 if cmath.isclose( 266 val, 267 ref, 268 rel_tol=FLOAT_REL_TOLERANCE, 269 abs_tol=FLOAT_ABS_TOLERANCE 270 ): 271 return { 272 "status": "accomplished", 273 "explanation": ( 274 f"Value {val} matches expected value to the required" 275 f" precision." 276 ) 277 } 278 else: 279 return { 280 "status": "failed", 281 "explanation": ( 282 f"Value {val} does not match expected value {ref} to the" 283 f" required precision." 284 ) 285 } 286 287 288#-------------------# 289# String comparison # 290#-------------------# 291 292def multiline_strings_are_exactly_equal(val, ref): 293 """ 294 A checker to be used with harnesses which generate multi-line strings 295 where the outputs can reasonable be expected to be exactly equal. 296 Displays results more nicely than `omni_compare` because it knows 297 they'll be strings. 298 """ 299 if not isinstance(val, str) or not isinstance(ref, str): 300 raise TypeError( 301 ( 302 "Can't check string equality for non-string value(s):\n" 303 + "{}\nand\n{}" 304 ).format(repr(val), repr(ref)) 305 ) 306 307 if val == ref: 308 return { 309 "status": "accomplished", 310 "explanation": "Values are the same.<br>\n{}".format( 311 html_tools.html_output_details(val, "Value tested") 312 ) 313 } 314 else: 315 val_lines = val.splitlines(keepends=True) 316 ref_lines = ref.splitlines(keepends=True) 317 diffTable = html_tools.html_diff_table( 318 val_lines, 319 ref_lines, 320 "Actual result", 321 "Corresponding lines (if any) in expected result" 322 ) 323 return { 324 "status": "failed", 325 "explanation": "Values are different.<br>\n{}".format( 326 diffTable 327 ) 328 } 329 330 331def strings_are_equal_modulo_whitespace(val, ref, ignore_case=True): 332 """ 333 A checker to be used with OutputTest or ValueTest goals that checks 334 whether the value and the reference value are equivalent strings if 335 whitespace is ignored completely. Fails if either value isn't a 336 string. Ignores case by default. 337 """ 338 trans = {ord(c): '' for c in ' \t\n\r'} 339 if not isinstance(val, str) or not isinstance(ref, str): 340 raise TypeError( 341 ( 342 "Can't check string equality for non-string value(s):\n" 343 + "{}\nand\n{}" 344 ).format(repr(val), repr(ref)) 345 ) 346 347 val_collapsed = val.translate(trans) 348 ref_collapsed = ref.translate(trans) 349 350 if ignore_case: 351 val_collapsed = val_collapsed.casefold() 352 ref_collapsed = ref_collapsed.casefold() 353 354 if val == ref: 355 return { 356 "status": "accomplished", 357 "explanation": "Strings are the same.<br>\n{}".format( 358 html_tools.html_output_details(val, "Value tested") 359 ) 360 } 361 elif val_collapsed == ref_collapsed: 362 return { 363 "status": "accomplished", 364 "explanation": ( 365 "Strings are the same (ignoring whitespace).<br>\n{}" 366 ).format( 367 html_tools.html_output_details( 368 val_collapsed, 369 "Simplified value" 370 ) 371 ) 372 } 373 else: 374 val_lines = val.splitlines(keepends=True) 375 ref_lines = ref.splitlines(keepends=True) 376 377 diffTable = html_tools.html_diff_table( 378 val_lines, 379 ref_lines, 380 'Actual output', 381 'Corresponding lines (if any) in expected output' 382 ) 383 384 return { 385 "status": "failed", 386 "explanation": ( 387 "Actual value is different from the reference value:<br>\n{}" 388 ).format(diffTable) 389 } 390 391 392def without_trailing_whitespace(s): 393 """ 394 Returns the given string with any trailing whitespace removed. 395 Returns an empty string if given only whitespace. 396 """ 397 trailing = 0 398 for c in s: 399 if re.match(r'\s', c): # it's whitespace 400 trailing += 1 401 else: 402 trailing = 0 403 404 if trailing > 0: 405 return s[:-trailing] 406 else: 407 return s 408 409 410def without_trailing_whitespace_per_line(s): 411 """ 412 Returns a version of the given multi-line string where trailing 413 whitespace has been stripped from each line. 414 """ 415 return '\n'.join( 416 without_trailing_whitespace(line) 417 for line in s.split('\n') 418 ) 419 420 421def compress_line(s): 422 """ 423 Compresses internal whitespace and strips trailing whitespace from a 424 single line of text. 425 """ 426 w = without_trailing_whitespace(s) 427 return re.sub(r'\s+', ' ', w) 428 429 430def compress_whitespace(s): 431 """ 432 Returns a version of the given string where all runs of one or more 433 whitespace characters on each line have been replaced with a single 434 space character, and all trailing whitespace has been stripped from 435 each line, plus any blank lines have been removed. 436 """ 437 return '\n'.join( 438 filter( 439 lambda x: x != '', 440 [ 441 compress_line(line) 442 for line in s.split('\n') 443 ] 444 ) 445 ) 446 447 448def strings_are_equal_modulo_most_whitespace( 449 val, 450 ref, 451 track_internal_whitespace=False, 452 ignore_case=True 453): 454 """ 455 A checker to be used with OutputTest or ValueTest goals that checks 456 whether the value and the reference value are exactly equivalent 457 strings after any trailing and extra internal whitespace is deleted. 458 Fails if either value isn't a string. 459 460 if track_internal_whitespace is True, will be only partially 461 successful if there are differences in non-trailing whitespace or 462 blank lines. 463 464 By default, case is ignored. 465 """ 466 if not isinstance(val, str) or not isinstance(ref, str): 467 raise TypeError( 468 ( 469 "Can't check string equality for non-string value(s):\n" 470 + "{}\nand\n{}" 471 ).format(repr(val), repr(ref)) 472 ) 473 474 if ignore_case: 475 val_stripped = without_trailing_whitespace_per_line(val).casefold() 476 ref_stripped = without_trailing_whitespace_per_line(ref).casefold() 477 else: 478 val_stripped = without_trailing_whitespace_per_line(val) 479 ref_stripped = without_trailing_whitespace_per_line(ref) 480 481 val_compressed = compress_whitespace(val_stripped) 482 ref_compressed = compress_whitespace(ref_stripped) 483 484 if val == ref: # full equality 485 return { 486 "status": "accomplished", 487 "explanation": "Strings are the same.<br>\n{}".format( 488 html_tools.html_output_details(val, "Value tested") 489 ) 490 } 491 elif val_stripped == ref_stripped: # equal modulo trailing whitespace 492 return { 493 "status": "accomplished", 494 "explanation": ( 495 "Strings are the same (ignoring trailing whitespace).<br>\n{}" 496 ).format( 497 html_tools.html_output_details( 498 val_stripped, 499 "Simplified value" 500 ) 501 ) 502 } 503 elif val_compressed == ref_compressed: # equal when compressed 504 if track_internal_whitespace: 505 val_lines = val.splitlines(keepends=True) 506 ref_lines = ref.splitlines(keepends=True) 507 508 diffTable = html_tools.html_diff_table( 509 val_lines, 510 ref_lines, 511 "Actual output", 512 "Corresponding lines (if any) in expected output" 513 ) 514 return { 515 "status": "partial", 516 "explanation": ( 517 "Strings aren't the same, but all differences are just" 518 + " in whitespace:<br>\n{}" 519 ).format(diffTable) 520 } 521 else: 522 return { 523 "status": "accomplished", 524 "explanation": ( 525 "Strings are the same (ignoring most whitespace).<br>\n{}" 526 ).format( 527 html_tools.html_output_details( 528 val_compressed, 529 "Simplified value" 530 ) 531 ) 532 } 533 else: 534 val_lines = val.splitlines(keepends=True) 535 ref_lines = ref.splitlines(keepends=True) 536 537 diffTable = html_tools.html_diff_table( 538 val_lines, 539 ref_lines, 540 "Actual output", 541 "Corresponding lines (if any) in expected output" 542 ) 543 544 return { 545 "status": "failed", 546 "explanation": ( 547 "Actual value is different from the reference value:<br>\n{}" 548 ).format(diffTable) 549 } 550 551 552word_re = re.compile(r"[\w']+") 553 554 555def clean_word_list(line): 556 """ 557 Takes a single line of output and produces a list of words, which are 558 taken to be strictly contiguous sequences of characters in the \\w 559 class, plus apostrophe. 560 """ 561 return word_re.findall(line) 562 563 564def rough_sequence_contains( 565 val_lines, 566 ref_lines, 567 sequence_match_threshold, 568 partial_sequence_threshold, 569 line_match_threshold, 570): 571 """ 572 Returns the string "full" if when diffing val_lines and ref_lines, 573 the percentage of lines from ref_lines that are matched is at or 574 above the sequence_match_threshold, and the string "partial" if that 575 threshold isn't met bu the partial_sequence_threshold is. Returns None 576 if neither threshold is met. 577 578 Adjacent delete/add pairs where the deleted line has a difflib 579 sequence similarity ratio of at least the line_match_threshold are 580 considered matching lines. 581 """ 582 differ = difflib.Differ() 583 comparison = [ 584 line 585 for line in differ.compare(val_lines, ref_lines) 586 if line[:2] != '? ' # filter out comment lines 587 ] 588 589 # chunk the diff to group sequences of additions and deletions 590 chunks = [] 591 while comparison: 592 if comparison[0][:2] == ' ': # grab a chunk of matches 593 i = 0 594 while i < len(comparison) and comparison[i][:2] == ' ': 595 i += 1 596 matching_block = comparison[:i] 597 comparison = comparison[i:] 598 chunks.append(('match', matching_block)) 599 600 elif comparison[0][:2] == '- ': # grab a chunk of deletions 601 i = 0 602 while i < len(comparison) and comparison[i][:2] == '- ': 603 i += 1 604 # If next thing is a + that matches the last -, don't include 605 # the last - 606 if ( 607 i < len(comparison) 608 and comparison[i][:2] == '+ ' 609 and difflib.SequenceMatcher( 610 None, 611 comparison[i - 1][2:], 612 comparison[i][2:] 613 ).ratio() >= line_match_threshold # noqa: E123 614 ): 615 missing_block = comparison[:i - 1] 616 paired_block = comparison[i - 1:i + 1] 617 comparison = comparison[i + 1:] 618 else: 619 missing_block = comparison[:i] 620 paired_block = None 621 comparison = comparison[i:] 622 if missing_block: # might be empty list 623 chunks.append(('missing', missing_block)) 624 if paired_block: # might be None 625 chunks.append(('paired', paired_block)) 626 627 elif comparison[0][:2] == '+ ': # grab a chunk of additions 628 i = 0 629 while i < len(comparison) and comparison[i][:2] == '+ ': 630 i += 1 631 added_block = comparison[:i] 632 comparison = comparison[i:] 633 chunks.append(('added', added_block)) 634 635 # Count matched + unmatched lines 636 matched = 0 637 unmatched = 0 638 for chtype, lines in chunks: 639 if chtype == 'match': 640 matched += len(lines) 641 elif chtype == 'paired': 642 matched += 1 643 elif chtype == 'added': 644 unmatched += len(lines) 645 646 # Note we don't care about the number of deletions necessary 647 648 # Who knows if matched + unmatched will add up? Who cares? 649 match_proportion = max( 650 matched / len(ref_lines), 651 (len(ref_lines) - unmatched) / len(ref_lines) 652 ) 653 654 # Make our decision 655 if (match_proportion >= sequence_match_threshold): 656 return "full" 657 elif (match_proportion >= partial_sequence_threshold): 658 return "partial" 659 else: 660 return None 661 662 663def very_fuzzy_string_compare( 664 val, 665 ref, 666 line_match_threshold=0.5, 667 sequence_match_threshold=0.8, 668 partial_sequence_threshold=None, 669): 670 """ 671 A checker to be used with OutputTest or ValueTest goals that checks 672 whether the value and the reference value are kinda related. What it 673 does is chops each line of both strings up into words, which consist 674 of one or more alphanumeric characters, and ignores the rest of the 675 line. These ordered tuples of words are made into lists, and the test 676 succeeds if there is some ordered mapping from the word-tuple-list of 677 the value to the word-tuple-list of the reference value that covers 678 at least sequence_match_threshold percent of the items in the 679 reference list, no matter how many items in the value list are 680 unmatched (in other words, there may be a huge amount of extra 681 output, but we don't care). 682 683 The mapping may only map together lines where the difflib sequence 684 similarity is at least the line_match_threshold (but this is after 685 collapsing the tuple to a string with spaces in between, meaning that 686 punctuation is ignored). 687 688 The partial_sequence_threshold serves as the threshold for what 689 percent of the reference sequence must be matched for the result to 690 be a partial success. If not given, it will be set to 1/2 of the 691 sequence_match_threshold. 692 693 Case and blank lines are ignored. 694 """ 695 696 if partial_sequence_threshold is None: 697 partial_sequence_threshold = sequence_match_threshold / 2 698 699 if not isinstance(val, str) or not isinstance(ref, str): 700 raise TypeError( 701 ( 702 "Can't check string equality for non-string value(s):\n" 703 + "{}\nand\n{}" 704 ).format(repr(val), repr(ref)) 705 ) 706 707 val_folded = val.casefold() 708 ref_folded = ref.casefold() 709 710 val_compressed = compress_whitespace(val_folded) 711 ref_compressed = compress_whitespace(ref_folded) 712 713 if val_folded == ref_folded: # full equality ignoring case 714 return { 715 "status": "accomplished", 716 "explanation": "Strings are the same.<br>\n{}".format( 717 html_tools.html_output_details(val, "Value tested") 718 ) 719 } 720 elif val_compressed == ref_compressed: # equal when compressed 721 return { 722 "status": "accomplished", 723 "explanation": ( 724 "Strings are the same (ignoring most whitespace).<br>\n{}" 725 ).format( 726 html_tools.html_output_details( 727 val_compressed, 728 "Simplified value" 729 ) 730 ) 731 } 732 733 else: # we actually have to check matches 734 val_raw_lines = val.splitlines(keepends=True) 735 ref_raw_lines = ref.splitlines(keepends=True) 736 737 diffTable = html_tools.html_diff_table( 738 val_raw_lines, 739 ref_raw_lines, 740 "Actual output", 741 "Corresponding lines (if any) in expected output" 742 ) 743 744 val_wordlists = [ 745 clean_word_list(line) 746 for line in val_compressed.splitlines() 747 if clean_word_list(line) 748 ] 749 ref_wordlists = [ 750 clean_word_list(line) 751 for line in ref_compressed.splitlines() 752 if clean_word_list(line) 753 ] 754 755 val_wordlines = [' '.join(wl) for wl in val_wordlists] 756 ref_wordlines = [' '.join(wl) for wl in ref_wordlists] 757 758 # Check for a rough match between the sequences... 759 match_status = rough_sequence_contains( 760 val_wordlines, 761 ref_wordlines, 762 sequence_match_threshold, 763 partial_sequence_threshold, 764 line_match_threshold 765 ) 766 767 if match_status == "full": 768 return { 769 "status": "accomplished", 770 "explanation": ( 771 "Strings are similar enough.<br>\n{}" 772 ).format(diffTable) 773 } 774 elif match_status == "partial": 775 return { 776 "status": "partial", 777 "explanation": ( 778 "Strings are somewhat similar.<br>\n{}" 779 ).format(diffTable) 780 } 781 else: 782 return { 783 "status": "failed", 784 "explanation": ( 785 "Actual value is different from the reference" 786 + " value:<br>\n{}" 787 ).format(diffTable) 788 } 789 790 791def build_contains_re_checker(name, expression, required_matches=1): 792 """ 793 Returns a checker function that tests whether the submitted value 794 contains at least the required number of matches for the given 795 regular expression (a string, pre-compiled expression, or function 796 which will produce an re.Pattern when given a reference string). The 797 name is used to describe the pattern in the explanation returned. 798 """ 799 if isinstance(expression, str): 800 build_regex = lambda ref: re.compile(expression) 801 elif isinstance(expression, re.Pattern): 802 build_regex = lambda ref: expression 803 else: 804 build_regex = expression 805 806 def string_contains_re(val, ref): 807 """ 808 This will be replaced. 809 """ 810 regex = build_regex(ref) 811 if not isinstance(val, str): 812 return { 813 "status": "failed", 814 "explanation": ( 815 "Value is not a string! (was looking for: '{}')" 816 ).format(name) 817 } 818 matches = regex.findall(val) 819 if len(matches) >= required_matches: 820 if required_matches == 1: 821 return { 822 "status": "accomplished", 823 "explanation": "Found {}.".format(name) 824 } 825 else: 826 return { 827 "status": "accomplished", 828 "explanation": ( 829 "Found {} matches (of {} required) for {}." 830 ).format(len(matches), required_matches, name) 831 } 832 else: 833 if required_matches == 1: 834 return { 835 "status": "failed", 836 "explanation": ("Did not find {}.").format(name) 837 } 838 else: 839 return { 840 "status": "failed", 841 "explanation": ( 842 "Found {} matches (of {} required) for {}." 843 ).format(len(matches), required_matches, name) 844 } 845 846 string_contains_re.__doc__ = """\ 847Returns "accomplished" if the value contains at least {} 848matche(s) for the regular expression: 849 850 {} 851 852In the output, it is referred to as: 853 854 {} 855""".format( 856 required_matches, 857 repr(expression), 858 name 859) 860 return string_contains_re 861 862 863def build_approx_matcher(threshold=0.8, partial_threshold=0.6): 864 """ 865 Returns a checker function that uses a difflib.SequenceMatcher to 866 computer a difference ratio for the given values (should be strings) 867 and compares it to the given threshold. 868 """ 869 def approx_check(val, ref): 870 """ 871 Uses a difflib.SequenceMatcher to compare two strings, and 872 succeeds if their similarity is at least a certain threshold. 873 """ 874 if not isinstance(val, str) or not isinstance(ref, str): 875 raise TypeError("Value and reference value must both be strings.") 876 877 ratio = difflib.SequenceMatcher(None, val, ref).ratio() 878 879 if ratio >= threshold: 880 status = "accomplished" 881 status_desc = "similar enough" 882 if ratio >= 0.9: 883 status_desc = "almost identical" 884 elif ratio == 1: 885 status_desc = "exactly the same" 886 elif ratio >= partial_threshold: 887 status = "partial" 888 status_desc = "somewhat similar" 889 else: 890 status = "failed" 891 status_desc = "not the same" 892 893 # TODO: NOT THIS HERE! 894 explanation = ( 895 "Values were {}.<br>\n{}" 896 ).format( 897 status_desc, 898 html_tools.html_diff_table( 899 val, 900 ref, 901 joint_title="Values compared:" 902 ) 903 ) 904 905 return { 906 "status": status, 907 "explanation": explanation 908 } 909 910 return approx_check 911 912 913#-------------------------# 914# Distribution comparison # 915#-------------------------# 916 917def result_distributions_have_same_keys(obs_dist, ref_dist): 918 """ 919 Compares two result distribution dictionaries to ensure that they 920 include the same set of keys, regardless of how common those keys 921 are. Each dictionary should contain "trials" and "results" keys, 922 where the "results" key maps different results to integer 923 frequencies. 924 """ 925 926 obs_res = obs_dist["results"] 927 ref_res = ref_dist["results"] 928 929 extra = set(obs_res) - set(ref_res) 930 missing = set(ref_res) - set(obs_res) 931 932 if extra or missing: 933 return { 934 "status": "failed", 935 "explanation": html_tools.build_html_details( 936 "Possibilities are different", 937 "<ul>\n{}\n</ul>".format( 938 '\n'.join( 939 [ 940 ( 941 "<li>Observed result:\n<pre>{}</pre>" 942 + "\nnever occurred in reference" 943 + " distribution.</li>" 944 ).format(ext) 945 for ext in extra 946 ] + [ 947 ( 948 "<li>Expected result:\n<pre>{}</pre>" 949 + "\nnever occurred in observed" 950 + " distribution.</li>" 951 ).format(mis) 952 for mis in missing 953 ] 954 ) 955 ) 956 ) 957 } 958 else: 959 return { 960 "status": "accomplished", 961 "explanation": ( 962 "Observed distribution has the same set of possibilities" 963 + " as the reference distribution." 964 ) 965 } 966 967 968def build_distribution_comparator(precision=2): 969 """ 970 Returns a comparator function for result distributions, where 971 fractions of the result in each distribution category are expected to 972 agree after rounding to the given number of decimal places. 973 """ 974 def result_distributions_are_similar(obs_dist, ref_dist): 975 """ 976 Compares two result distribution dictionaries for statistical 977 similarity. Each dictionary should contain "trials" and "results" 978 keys, where the "results" key maps different results to integer 979 frequencies. 980 981 # TODO: Better statistical tests here... 982 """ 983 984 obs_res = obs_dist["results"] 985 ref_res = ref_dist["results"] 986 987 extra = set(obs_res) - set(ref_res) 988 missing = set(ref_res) - set(obs_res) 989 990 if extra or missing: 991 return { 992 "status": "failed", 993 "explanation": html_tools.build_html_details( 994 "Distributions are different", 995 "<ul>\n{}\n</ul>".format( 996 '\n'.join( 997 [ 998 ( 999 "<li>Observed result:\n<pre>{}</pre>" 1000 + "\nnever occurred in reference" 1001 + " distribution.</li>" 1002 ).format(ext) 1003 for ext in extra 1004 ] + [ 1005 ( 1006 "<li>Expected result:\n<pre>{}</pre>" 1007 + "\nnever occurred in observed" 1008 + " distribution.</li>" 1009 ).format(mis) 1010 for mis in missing 1011 ] 1012 ) 1013 ) 1014 ) 1015 } 1016 1017 disagree = [] 1018 for key in sorted(obs_res): 1019 obs_fr = round(obs_res[key] / obs_dist["trials"], precision) 1020 ref_fr = round(ref_res[key] / ref_dist["trials"], precision) 1021 if obs_fr != ref_fr: 1022 disagree.append((key, obs_fr, ref_fr)) 1023 1024 if disagree: 1025 return { 1026 "status": "failed", 1027 "explanation": html_tools.build_html_details( 1028 "Distributions were noticeably different", 1029 "<ul>\n{}\n</ul>".format( 1030 '\n'.join( 1031 ( 1032 "<li>Result\n<pre>{}</pre>\n occurred" 1033 + " about {}% of the time in the reference" 1034 + " distribution but was observed about" 1035 + " {}% of the time.</li>" 1036 ).format( 1037 key, 1038 100 * ref_fr, 1039 100 * obs_fr 1040 ) 1041 for (key, obs_fr, ref_fr) in disagree 1042 ) 1043 ) 1044 ) 1045 } 1046 else: 1047 return { 1048 "status": "accomplished", 1049 "explanation": ( 1050 "Distributions agreed to the required precision:<br>\n" 1051 + str(ref_res) + " (ref)<br>\n" 1052 + str(obs_res) + " (obs)" 1053 ) 1054 } 1055 1056 return result_distributions_are_similar 1057 1058 1059#-----------------------------------# 1060# Sequence and structure comparison # 1061#-----------------------------------# 1062 1063_STUB_N = 0 1064_STUB_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 1065_STUB_LEN = len(_STUB_CHARS) 1066 1067 1068def reset_stubs(): 1069 """ 1070 Resets the stub counter. Useful to ensure that equivalent data 1071 structures are equivalent when made hashable via equivalent stubs, 1072 although order really matters (i.e., recursive dictionaries with 1073 different key orders will not necessarily end up with the same 1074 stubs). 1075 """ 1076 global _STUB_N 1077 _STUB_N = 0 1078 1079 1080def next_stub(): 1081 """ 1082 Generates a unique string to indicate the presence of a recursive 1083 structure encountered during make_hashable. 1084 """ 1085 global _STUB_N 1086 n = _STUB_N 1087 _STUB_N += 1 1088 result = "" 1089 while n // _STUB_LEN > 0: 1090 result = _STUB_CHARS[n % _STUB_LEN] + result 1091 n //= _STUB_LEN 1092 result = _STUB_CHARS[n % _STUB_LEN] + result 1093 return "R&" + result 1094 1095 1096def make_hashable(dataStructure, memo=None): 1097 """ 1098 Takes a data structure that may contain lists, tuples, dictionaries, 1099 and/or sets as well as strings, numbers, booleans, and Nones, and 1100 turns it into a similar structure using tuples in place of lists, 1101 sets, and dictionaries that can be hashed. Uses a ID-based memo to 1102 avoid re-processing recursive structures, with string stubs in place 1103 of already-visited objects. 1104 """ 1105 if memo is None: 1106 reset_stubs() 1107 memo = {} 1108 1109 # If this is a re-visit to a container object, return a stub string 1110 if ( 1111 not isinstance(dataStructure, (int, float, str, bool)) 1112 and id(dataStructure) in memo 1113 ): 1114 return memo[id(dataStructure)] 1115 1116 # Save a stub string in case of a future re-visit 1117 memo[id(dataStructure)] = next_stub() 1118 1119 if isinstance(dataStructure, (list, tuple)): 1120 return tuple([make_hashable(x, memo) for x in dataStructure]) 1121 elif isinstance(dataStructure, set): 1122 # We sort sets so that ordering is irrelevant 1123 return tuple([make_hashable(x, memo) for x in sorted(dataStructure)]) 1124 elif isinstance(dataStructure, dict): 1125 # We sort keys so that ordering is irrelevant 1126 return tuple( 1127 (key, make_hashable(dataStructure[key], memo)) 1128 for key in sorted(dataStructure) 1129 ) 1130 else: 1131 try: 1132 _ = hash(dataStructure) 1133 except TypeError as e: 1134 if "unhashable" in str(e): 1135 raise TypeError( 1136 ( 1137 "Could not convert object of type {} to hashable" 1138 + " equivalent:\n{}" 1139 ).format(type(dataStructure), repr(dataStructure)) 1140 ) 1141 else: 1142 raise 1143 1144 # If we get here it's fully hashable already 1145 return dataStructure 1146 1147 1148def longest_common_subsequence( 1149 seq1, 1150 seq2, 1151 start1=None, end1=None, 1152 start2=None, end2=None, 1153 matcher=lambda x, y: x == y, 1154 memo=None 1155): 1156 """ 1157 Finds the longest common subsequence between the continuous 1158 subsequences of the two given sequences which start and end at the 1159 given start/end points for each sequence. Uses the memo (which should 1160 never be provided explicitly) to remember solutions to sub-problems 1161 and greatly speed up the results. 1162 1163 The custom matcher function determines what counts as a match. If it 1164 returns a dictionary with a 'status' key, the two items are counted 1165 as matching only if that status value is exactly "accomplished". 1166 Otherwise, it should return a boolean, and any truthy return value 1167 will be counted as a match. 1168 1169 The return value is actually a tuple of pairs of integer indices, 1170 where each pair contains one index from sequence 1 and the index of a 1171 matching item from sequence 2. 1172 1173 If there are multiple different longest common subsequences, the one 1174 including the earliest items from sequence 1 is returned. 1175 """ 1176 # Assign default start/end values if they're missing: 1177 if start1 is None: 1178 start1 = 0 1179 if end1 is None: 1180 end1 = len(seq1) 1181 if start2 is None: 1182 start2 = 0 1183 if end2 is None: 1184 end2 = len(seq2) 1185 1186 # Create a new memo if this isn't a recursive call: 1187 if memo is None: 1188 memo = {} 1189 1190 memo_key = (start1, end1, start2, end2) 1191 1192 # If we've already solved this sub-problem return the stored solution 1193 if memo_key in memo: 1194 return memo[memo_key] 1195 1196 # Recursive base case: 1197 if start1 == end1 or start2 == end2: 1198 result = () # no match -> an empty tuple 1199 memo[memo_key] = result 1200 return result 1201 else: 1202 # Look at first item from each (use it or lose it) 1203 item1 = seq1[start1] 1204 item2 = seq2[start2] 1205 1206 match_test = matcher(item1, item2) 1207 # TODO: Some kind of handling for partial matches here?!? 1208 if isinstance(match_test, dict) and 'status' in match_test: 1209 match_test = match_test['status'] == 'accomplished' 1210 1211 if match_test: 1212 # first items match: recurse and then add them to the results 1213 restMatches = longest_common_subsequence( 1214 seq1, 1215 seq2, 1216 start1 + 1, end1, 1217 start2 + 1, end2, 1218 matcher, 1219 memo 1220 ) 1221 result = ((start1, start2),) + restMatches 1222 memo[memo_key] = result 1223 return result 1224 else: 1225 # first items don't match: check both possibilities (one or 1226 # the other won't be part of the overall match) 1227 case1Matches = longest_common_subsequence( 1228 seq1, 1229 seq2, 1230 start1 + 1, end1, 1231 start2, end2, 1232 matcher, 1233 memo 1234 ) 1235 case2Matches = longest_common_subsequence( 1236 seq1, 1237 seq2, 1238 start1, end1, 1239 start2 + 1, end2, 1240 matcher, 1241 memo 1242 ) 1243 if len(case1Matches) < len(case2Matches): 1244 # case 2 is longer 1245 memo[memo_key] = case2Matches 1246 return case2Matches 1247 else: 1248 # a tie gets biased towards earlier items from sequence 1 1249 memo[memo_key] = case1Matches 1250 return case1Matches 1251 1252 1253def diff_blocks(seq1, seq2, matcher=lambda x, y: x == y): 1254 """ 1255 Returns a list of 3-element lists in the same format as 1256 difflib.SequenceMatcher's get_matching_blocks output: A list of 1257 [seq1-index, seq1-index, length] triples where each triple indicates 1258 that sequences 1 and 2 match starting at seq1-index in sequence 1 and 1259 seq2-index in sequence 2 and continuing for length items. 1260 1261 The difference is that this method accepts an optional custom matcher 1262 function which is used to establish whether two items are equal or 1263 not. That matcher function may return a truthy value to indicate a 1264 match, or it could be a checker function which returns a dictionary 1265 containing a 'status' key, where the status being 'accomplished' 1266 indicates a match. 1267 """ 1268 # Get longest common subsequence 1269 lcs = longest_common_subsequence(seq1, seq2, matcher=matcher) 1270 if len(lcs) == 0: 1271 return [] # no matches whatsoever... 1272 1273 # Convert sequence of matched indices to a list of matching blocks 1274 1275 # The first block is a length-1 block anchored at the first matching 1276 # indices 1277 blocks = [ [lcs[0][0], lcs[0][1], 1] ] 1278 1279 # We go through all point matches and extend each block or create a 1280 # new block 1281 for i1, i2 in lcs[1:]: 1282 if ( 1283 i1 == blocks[-1][0] + blocks[-1][2] 1284 and i2 == blocks[-1][1] + blocks[-1][2] 1285 ): 1286 blocks[-1][2] += 1 # block continues 1287 else: 1288 blocks.append([i1, i2, 1]) # new block starts 1289 1290 return blocks 1291 1292 1293def correspondence_helper( 1294 pool1, 1295 pool2, 1296 indexset1, 1297 indexset2, 1298 lower_bound=0, 1299 matcher=lambda x, y: x == y, 1300 memo=None 1301): 1302 """ 1303 Helper for best_correspondence that also takes frozen sets of live 1304 indices for each each pool so that memoization can be applied. 1305 1306 The lower_bound argument allows it to abort early if it figures out 1307 that the upper bound for its match is smaller than that value. 1308 1309 Returns a pool1 -> pool2 mapping. 1310 1311 TODO: This function seems dangerously exponential in both time and 1312 memory... Can we do better? Should we do something else? 1313 """ 1314 if memo is None: 1315 memo = {} 1316 1317 if min(len(indexset1), len(indexset2)) < lower_bound: 1318 return None 1319 # special value indicating an aborted call; we don't memoize this. 1320 1321 # Look up a result if we've got one 1322 memo_key = (indexset1, indexset2) 1323 if memo_key in memo: 1324 return memo[memo_key] 1325 1326 # If either is exhausted, the result is an empty mapping: 1327 if len(indexset1) == 0 or len(indexset2) == 0: 1328 result = {} 1329 memo[memo_key] = result 1330 return result 1331 1332 # Find all possible matches of the first available key in the first 1333 # pool: 1334 all_matches = [] 1335 first_p1_index = next(indexset1.__iter__()) 1336 first_p1_item = pool1[first_p1_index] 1337 for p2_index in indexset2: 1338 if matcher(first_p1_item, pool2[p2_index]): # it's a match 1339 all_matches.append(p2_index) 1340 1341 # Now recurse for each possible match: 1342 best_so_far = None 1343 for p2_index in all_matches: 1344 sub_result = correspondence_helper( 1345 pool1, 1346 pool2, 1347 indexset1 - { first_p1_index }, 1348 indexset2 - { p2_index }, 1349 lower_bound=max(0, lower_bound - 1), 1350 # -1 because of the extra match we'll add to this result 1351 matcher=matcher, 1352 memo=memo 1353 ) 1354 1355 # Add match that we're assuming to this result 1356 full_result = { first_p1_index: p2_index } 1357 full_result.update(sub_result) 1358 1359 # remember best so far 1360 if ( 1361 sub_result is not None 1362 and (best_so_far is None or len(full_result) > len(best_so_far)) 1363 ): 1364 # Remember this result 1365 best_so_far = full_result 1366 # Move our lower bound up if it can 1367 lower_bound = max(lower_bound, len(best_so_far)) 1368 1369 # One more possibility: that we find a better result by leaving 1370 # first_p1_index unmatched entirely (or it can't match) 1371 unmatch_result = correspondence_helper( 1372 pool1, 1373 pool2, 1374 indexset1 - { first_p1_index }, 1375 indexset2, 1376 lower_bound=lower_bound, # same as 1377 matcher=matcher, 1378 memo=memo 1379 ) 1380 1381 # Update best_so_far: 1382 if ( 1383 unmatch_result is not None 1384 and (best_so_far is None or len(unmatch_result) > len(best_so_far)) 1385 ): 1386 best_so_far = unmatch_result 1387 # No need to update lower bound, as we're pretty much done 1388 # already 1389 1390 # Turn any possible remaining None subresults into empty dictionaries 1391 if best_so_far is None: 1392 best_so_far = {} 1393 1394 # Now we just return the best option that we found. 1395 memo[memo_key] = best_so_far 1396 return best_so_far 1397 1398 1399def best_correspondence(pool1, pool2, matcher=lambda x, y: x == y): 1400 """ 1401 Finds the best pairing of elements from the two given pools (which 1402 aren't necessarily the same size) such that each pair satisfies the 1403 given matcher function (default is the items must be equal) and the 1404 total number of pairs is maximized. The order of items in each pool 1405 is irrelevant, but the pools must be indexable, because this function 1406 returns a dictionary that maps indices in pool1 to indices in pool2. 1407 """ 1408 return correspondence_helper( 1409 pool1, 1410 pool2, 1411 frozenset(range(len(pool1))), 1412 frozenset(range(len(pool2))), 1413 matcher=matcher 1414 ) 1415 1416 1417def sequence_diff( 1418 val, 1419 ref, 1420 order_matters=False, 1421 item_matcher=lambda x, y: x == y 1422): 1423 """ 1424 Returns a tuple of three lists: a list of matching items, a list of 1425 missing items that are present in the reference sequence but not the 1426 value sequence, and a list of extra items that are present in the 1427 value sequence but not the reference sequence. If order_matters is 1428 given as True, some items may be listed as both "missing" and "extra" 1429 if they appear out-of-order in the value sequence. 1430 1431 The item_matcher is used to determine what counts as a match, if it 1432 returns a truthy value, two items will be matched. However, it may 1433 also be a checker-style function that returns a dictionary with a 1434 'status' key, in which case a status value of 'accomplished' will 1435 count as a match and any other value will not. 1436 1437 TODO: Actually respect "partial" as an item_matcher result! 1438 """ 1439 # TODO: NOT THIS HACK! 1440 if order_matters is False: 1441 try: 1442 val = sorted(val) 1443 ref = sorted(ref) 1444 order_matters = True 1445 except Exception: 1446 pass 1447 1448 vhashables = [make_hashable(x) for x in val] 1449 rhashables = [make_hashable(x) for x in ref] 1450 1451 if order_matters: 1452 matching, missing, extra = [], [], [] 1453 1454 blocks = diff_blocks(vhashables, rhashables, matcher=item_matcher) 1455 1456 # Convert blocks into matching/missing/extra format 1457 last_vidx = 0 1458 last_ridx = 0 1459 for vidx, ridx, blen in blocks: 1460 matching.extend(val[idx] for idx in range(vidx, vidx + blen)) 1461 missing.extend(ref[idx] for idx in range(last_ridx, ridx)) 1462 extra.extend(val[idx] for idx in range(last_vidx, vidx)) 1463 1464 last_vidx = vidx + blen 1465 last_ridx = ridx + blen 1466 1467 # Catch extra missing/extra values (blocks might be empty) 1468 if last_vidx < len(val): 1469 extra.extend(val[idx] for idx in range(last_vidx, len(val))) 1470 if last_ridx < len(ref): 1471 missing.extend(ref[idx] for idx in range(last_ridx, len(ref))) 1472 1473 else: 1474 # Find the best unordered correspondence between the hashable 1475 # versions of the values and reference items. 1476 1477 # TODO: Safety value for values that are much longer than 1478 # expected? 1479 correspondence = best_correspondence( 1480 vhashables, 1481 rhashables, 1482 matcher=item_matcher 1483 ) 1484 matched_indices = set(correspondence.values()) 1485 1486 # Slower than sets but preserves ordering in each list 1487 matching = [ 1488 list(val)[idx] 1489 for idx in correspondence 1490 ] 1491 missing = [ 1492 list(ref)[idx] 1493 for idx in range(len(ref)) 1494 if idx not in matched_indices 1495 ] 1496 extra = [ 1497 list(val)[idx] 1498 for idx in range(len(val)) 1499 if idx not in correspondence 1500 ] 1501 1502 return matching, missing, extra 1503 1504 1505def make_structure_comparator( 1506 tolerance="auto", 1507 order_matters=False, 1508 item_matcher=lambda x, y: x == y, 1509 item_repr=html_tools.dynamic_html_repr, 1510 key_repr=None, 1511 include_full_results=True 1512): 1513 """ 1514 Creates a comparator that looks at two data structures (composed of 1515 lists, tuples, dictionaries, and/or sets) and reports on their 1516 differences. If the tolerance is left at the default (the string 1517 'auto'), a partial success will be returned if only a few items 1518 are different at the top level of the given data structure. It may be 1519 given as 0, in which case only full success or failure will result. 1520 If it is a number other than 0, that many total missing + extra items 1521 will be tolerated, if it is not a number or the string "auto", it 1522 must be a function which will be given the reference value being 1523 compared to and which should return a number of different items to be 1524 tolerated. 1525 1526 If the order of the given sequence matters, set order_matters to 1527 True (only apply this to sets and dictionaries if you expect them to 1528 be ordered). 1529 1530 A custom item_matcher may be provided, in which case it will be 1531 applied to pairs of items from the two lists, tuples, or sets being 1532 compared to decide which are equal. Normally, any truthy value will 1533 count as a match, but if the return value from the item_matcher is a 1534 dictionary which contains a 'status' key, the match will only be 1535 counted if the status value is 'accomplished', which allows the use 1536 of checker-style functions as item matchers. 1537 1538 TODO: Would be nice to support partial matching at the item level, 1539 but that's hard! 1540 1541 When an item_matcher is provided for comparing dictionaries, it will 1542 be applied to key/value pairs (i.e., it will be given two arguments 1543 which are both tuples, not four arguments). Sometimes the values will 1544 be None during the first phase when only keys are being compared. 1545 1546 A custom item_repr function may be provided to be used to generate 1547 item representations in explanations where lists of 1548 missing/extra/moved items appear. A custom key_repr may also be 1549 provided for representing dictionary keys; if left out it defaults to 1550 the same thing as item_repr (which itself defaults to `repr`). 1551 1552 Both of these custom repr functions should be able to accept one 1553 argument: the thing to be represented. 1554 1555 If attempts to compare or list full results are not desired, 1556 include_full_results may be given as False. 1557 """ 1558 if tolerance == "auto": 1559 tolerance_fcn = lambda ref: min(len(ref) // 5, 3 + len(ref) // 50) 1560 elif isinstance(tolerance, (int, float)): 1561 tolerance_fcn = lambda ref: tolerance 1562 else: 1563 tolerance_fcn = tolerance 1564 1565 if key_repr is None: 1566 key_repr = item_repr 1567 1568 def compare_structures(val, ref): 1569 """ 1570 Comparator for structured data (lists, tuples, sets, and/or 1571 dictionaries) according to the arguments given to 1572 make_structure_comparator. 1573 """ 1574 if not isinstance(ref, (list, tuple, set, dict)): 1575 raise TypeError( 1576 "Reference value for compare_structures must" 1577 + " be a list, tuple, set, or dictionary." 1578 ) 1579 elif type(val) != type(ref): 1580 return { 1581 "status": "failed", 1582 "explanation": ( 1583 "Your result was a {} but should have been a {}." 1584 ).format(type(val).__name__, type(ref).__name__) 1585 } 1586 1587 # Define our tolerance value 1588 tolerance = tolerance_fcn(ref) 1589 1590 if isinstance(val, dict): 1591 kmatch, kmiss, kext = sequence_diff( 1592 val.keys(), 1593 ref.keys(), 1594 # order doens't matter for dict keys... 1595 # TODO: What if it's a subclass though...? 1596 item_matcher=lambda k1, k2: ( 1597 item_matcher((k1, None), (k2, None)) 1598 ) 1599 ) 1600 wrong = len(kmiss) + len(kext) 1601 if wrong > 0: 1602 if wrong > tolerance: 1603 status = "failed" 1604 expl = ( 1605 "Your result has the wrong keys (out of {}," 1606 + " {} matched).<br>\n" 1607 ).format( 1608 phrasing.obj_num(len(ref), "expected key"), 1609 len(kmatch) 1610 ) 1611 else: 1612 status = "partial" 1613 expl = ( 1614 "Your result has some incorrect keys (out of {}," 1615 + " {} matched).<br>\n" 1616 ).format( 1617 phrasing.obj_num(len(ref), "expected key"), 1618 len(kmatch) 1619 ) 1620 if kmiss: 1621 expl += html_tools.build_html_details( 1622 "Missing keys:", 1623 html_tools.build_list(key_repr(x) for x in kmiss) 1624 ) 1625 if kext: 1626 expl += html_tools.build_html_details( 1627 "Extra keys:", 1628 html_tools.build_list(key_repr(x) for x in kext) 1629 ) 1630 1631 if include_full_results: 1632 expl += html_tools.html_diff_table( 1633 html_tools.truncate( 1634 html_tools.big_repr(val), 1635 limit=5000 1636 ), 1637 html_tools.truncate( 1638 html_tools.big_repr(ref), 1639 limit=5000 1640 ), 1641 "Full result", 1642 "Expected result", 1643 "Detailed differences:" 1644 ) 1645 return { "status": status, "explanation": expl } 1646 1647 # otherwise, we can check values 1648 1649 imatch, imiss, iext = sequence_diff( 1650 list(val.items()), 1651 list(ref.items()), 1652 order_matters=order_matters, 1653 item_matcher=item_matcher 1654 ) 1655 1656 if order_matters: 1657 # Find items that moved 1658 imoved = [] 1659 imoved_missing = [] 1660 imoved_extra = [] 1661 for midx, miss in enumerate(imiss): 1662 if miss in iext: 1663 eidx = iext.index(miss) 1664 imoved_missing.append(midx) 1665 imoved_extra.append(eidx) 1666 imoved.append(miss) 1667 1668 imiss = [ 1669 imiss[idx] 1670 for idx in range(len(imiss)) 1671 if idx not in imoved_missing 1672 ] 1673 1674 iext = [ 1675 iext[idx] 1676 for idx in range(len(iext)) 1677 if idx not in imoved_extra 1678 ] 1679 1680 else: # order doesn't matter 1681 imoved = [] 1682 1683 wrong = len(imiss) + len(iext) + len(imoved) 1684 if wrong > 0: 1685 if wrong > tolerance: 1686 status = "failed" 1687 expl = ( 1688 "Your result has the right keys but some values" 1689 + " are wrong (out of {}, {} matched)." 1690 ).format( 1691 phrasing.obj_num(len(ref), "expected value"), 1692 len(imatch) 1693 ) 1694 else: 1695 status = "partial" 1696 expl = ( 1697 "Your result has the right keys but a few values" 1698 + " are wrong (out of {}, {} matched)." 1699 ).format( 1700 phrasing.obj_num(len(ref), "expected value"), 1701 len(imatch) 1702 ) 1703 if imiss: 1704 expl += html_tools.build_html_details( 1705 "Missing values (by key):", 1706 html_tools.build_list( 1707 key_repr(k) + ": " + item_repr(v) 1708 for (k, v) in imiss 1709 ) 1710 ) 1711 if iext: 1712 expl += html_tools.build_html_details( 1713 "Extra values (by key):", 1714 html_tools.build_list( 1715 key_repr(k) + ": " + item_repr(v) 1716 for (k, v) in iext 1717 ) 1718 ) 1719 if imoved: 1720 expl += html_tools.build_html_details( 1721 "Out-of-order values (by key):", 1722 html_tools.build_list( 1723 key_repr(k) + ": " + item_repr(v) 1724 for (k, v) in imoved 1725 ) 1726 ) 1727 if include_full_results: 1728 expl += html_tools.html_diff_table( 1729 html_tools.truncate( 1730 html_tools.big_repr(val), 1731 limit=5000 1732 ), 1733 html_tools.truncate( 1734 html_tools.big_repr(ref), 1735 limit=5000 1736 ), 1737 "Full result", 1738 "Expected result", 1739 "Detailed differences:" 1740 ) 1741 1742 return { "status": status, "explanation": expl } 1743 else: 1744 if include_full_results: 1745 results = "<br>\n" + html_tools.build_html_details( 1746 "Your result:", 1747 html_tools.dynamic_html_repr(val) 1748 ) 1749 else: 1750 results = "" 1751 1752 return { 1753 "status": "accomplished", 1754 "explanation": ( 1755 "Your result has the correct keys and values ({}" 1756 + " matched).{}" 1757 ).format( 1758 phrasing.obj_num(len(imatch), "item"), 1759 results 1760 ) 1761 } 1762 1763 else: # anything other than a dictionary 1764 matching, missing, extra = sequence_diff( 1765 val, 1766 ref, 1767 order_matters=order_matters, 1768 item_matcher=item_matcher 1769 ) 1770 1771 if order_matters: 1772 # Find items that moved 1773 moved = [] 1774 moved_missing = [] 1775 moved_extra = [] 1776 for midx, miss in enumerate(missing): 1777 for eidx, xtra in enumerate(extra): 1778 if item_matcher(miss, xtra): 1779 # TODO: What if multiple copies are out-of-order? 1780 moved_missing.append(midx) 1781 moved_extra.append(eidx) 1782 moved.append(miss) 1783 break 1784 1785 new_missing = [ 1786 missing[idx] 1787 for idx in range(len(missing)) 1788 if idx not in moved_missing 1789 ] 1790 missing = new_missing 1791 1792 new_extra = [ 1793 extra[idx] 1794 for idx in range(len(extra)) 1795 if idx not in moved_extra 1796 ] 1797 extra = new_extra 1798 1799 else: # order doesn't matter 1800 moved = [] 1801 1802 wrong = len(missing) + len(extra) + len(moved) 1803 if wrong > 0: 1804 if wrong > tolerance: 1805 status = "failed" 1806 expl = ( 1807 "Your result has the wrong values (out of {}," 1808 + " {} matched)." 1809 ).format( 1810 phrasing.obj_num(len(ref), "expected value"), 1811 phrasing.obj_num(len(matching), "value") 1812 ) 1813 else: 1814 status = "partial" 1815 expl = ( 1816 "Your result is mostly correct but some values" 1817 + " are wrong (out of {}, {} matched)." 1818 ).format( 1819 phrasing.obj_num(len(ref), "expected value"), 1820 phrasing.obj_num(len(matching), "value") 1821 ) 1822 # TODO: better management of small differences in large 1823 # structures here! 1824 if missing: 1825 expl += html_tools.build_html_details( 1826 "Missing values:", 1827 html_tools.build_list( 1828 item_repr(x) for x in missing 1829 ) 1830 ) 1831 if extra: 1832 expl += html_tools.build_html_details( 1833 "Extra values:", 1834 html_tools.build_list( 1835 item_repr(x) for x in extra 1836 ) 1837 ) 1838 if moved: 1839 expl += html_tools.build_html_details( 1840 "Out-of-order values:", 1841 html_tools.build_list( 1842 item_repr(x) for x in moved 1843 ) 1844 ) 1845 if include_full_results: 1846 # TODO: We'd like scrollable full-content diffs here, 1847 # but... that's a Project. 1848 expl += html_tools.html_diff_table( 1849 html_tools.truncate( 1850 html_tools.big_repr(val), 1851 limit=5000 1852 ), 1853 html_tools.truncate( 1854 html_tools.big_repr(ref), 1855 limit=5000 1856 ), 1857 "Full result", 1858 "Expected result", 1859 "Detailed differences:" 1860 ) 1861 return { "status": status, "explanation": expl } 1862 else: 1863 if include_full_results: 1864 results = "<br>\n" + html_tools.build_html_details( 1865 "Your result:", 1866 html_tools.dynamic_html_repr(val) 1867 ) 1868 else: 1869 results = "" 1870 return { 1871 "status": "accomplished", 1872 "explanation": ( 1873 "Your result has the correct values ({}" 1874 + " matched).{}" 1875 ).format( 1876 phrasing.obj_num(len(matching), "value"), 1877 results 1878 ) 1879 } 1880 1881 return compare_structures 1882 1883 1884#------------------# 1885# Image comparison # 1886#------------------# 1887 1888def diff_anim_frames(val, ref, steps=10): 1889 """ 1890 Creates and returns a list of PIL.Image objects representing 1891 animation frames that fade between the two given images. A custom 1892 number of fade steps may be provided, which specifies the number of 1893 intermediate frames, so the result will have that many frames plus 1894 two. The fade is linear. 1895 """ 1896 import PIL.Image # we import here and let a potential error bubble out 1897 # TODO: Draw label text on anim frames! 1898 1899 result = [val] 1900 for step in range(1, steps + 1): 1901 t = step / (steps + 1) 1902 result.append(PIL.Image.blend(val, ref, t)) 1903 result.append(ref) 1904 return result 1905 1906 1907def make_image_comparator( 1908 allowed_differences=0.03, 1909 partial_allowed=0.5, 1910 similarity_threshold=15 1911): 1912 """ 1913 Returns a comparison function suitable for use with 'image' context 1914 slots. It checks pixels in the two images (which it checks are 1915 actually images and are the same size) and returns a status of 1916 accomplished if the fraction of the area of the image in which pixels 1917 differ is less than or equal to the given allowed fraction. Pixels 1918 which have a 255-basis-RGB-space Euclidean distance (TODO: better 1919 distance metric) below the given similarity threshold are counted as 1920 1/2 of a pixel difference instead of a full pixel difference. (TODO: 1921 Better controls for this?). A partially-complete result is returned 1922 as long as the fractional area of different pixels is no more than 1923 the given partial_allowed value. 1924 """ 1925 import PIL.Image # we import here and let a potential error bubble out 1926 1927 def compare_images(val, ref): 1928 """ 1929 Compares two PILlow images, ensuring that the total percentage of 1930 pixels which differ between the two images is below a threshold. 1931 Pixels which differ only slightly will count as 1/2 a pixel in 1932 terms of the area difference computation. Images of different 1933 sizes will always be treated as totally different (TODO: More 1934 nuance there?). 1935 """ 1936 if not isinstance(ref, PIL.Image.Image): 1937 raise TypeError( 1938 f"Reference image capture failed (got a {type(ref)}," 1939 f" not a PIL.Image)" 1940 ) 1941 if not isinstance(val, PIL.Image.Image): 1942 return { 1943 "status": "failed", 1944 "explanation": ( 1945 f"Image capture failed (got a {type(val)}, not an" 1946 f" image)." 1947 ) 1948 } 1949 1950 if val.size != ref.size: 1951 return { 1952 "status": "failed", 1953 "explanation": ( 1954 f"Your image ({val.width}x{val.height}) did not" 1955 f" have the same size as the solution image" 1956 f" ({ref.width}x{ref.height})." 1957 ) 1958 } 1959 1960 # Should be 0-255 RGB data 1961 # TODO: Verify mode 1962 val_pixels = val.getdata() 1963 ref_pixels = ref.getdata() 1964 diff_area = 0 1965 half_diff_area = 0 1966 for index in range(len(val_pixels)): 1967 px = val_pixels[index] 1968 ref_px = ref_pixels[index] 1969 dist = sum((px[i] - ref_px[i])**2 for i in range(3))**0.5 1970 if 0 < dist <= similarity_threshold: 1971 half_diff_area += 1 1972 elif similarity_threshold < dist: 1973 diff_area += 1 1974 1975 # Build an HTML image comparison 1976 comparison = html_tools.build_html_tabs( 1977 [ 1978 # TODO: Proper alt text here! 1979 ( 1980 "Your image:", 1981 html_tools.html_image(val, "Your image") 1982 ), 1983 ( 1984 "Solution image:", 1985 html_tools.html_image(ref, "Solution image") 1986 ), 1987 ( 1988 "Animation:", 1989 html_tools.html_animation( 1990 diff_anim_frames(val, ref, 10), 1991 ( 1992 "An animation between your image and the" 1993 " solution image." 1994 ), 1995 delays=[500] + [100] * 10 + [500] 1996 ) 1997 ) 1998 ] 1999 ) 2000 2001 # Return a status based on the difference fraction 2002 diff_fraction = diff_area / len(val_pixels) 2003 half_diff_fraction = half_diff_area / len(val_pixels) 2004 diff_score = diff_fraction + half_diff_fraction / 2 2005 2006 diff_pct = round_to_sig_figs(diff_fraction * 100, 2) 2007 diff_msg = ( 2008 f"{diff_pct}% of pixels were" 2009 f" different" 2010 ) 2011 if half_diff_fraction > 0: 2012 half_pct = round_to_sig_figs(half_diff_fraction * 100, 2) 2013 diff_msg += ( 2014 f" and {half_pct}% of pixels were" 2015 f" somewhat different" 2016 ) 2017 2018 if diff_score == 0: 2019 return { 2020 "status": "accomplished", 2021 "explanation": ( 2022 f"Your image and the solution image were exactly the" 2023 f" same: {comparison}" 2024 ) 2025 } 2026 elif diff_score <= allowed_differences: 2027 return { 2028 "status": "accomplished", 2029 "explanation": ( 2030 f"Your image and the solution image were almost the" 2031 f" same ({diff_msg}): {comparison}" 2032 ) 2033 } 2034 elif diff_score <= partial_allowed: 2035 return { 2036 "status": "partial", 2037 "explanation": ( 2038 f"Your image and the solution image were similar," 2039 f" but not the same ({diff_msg}): {comparison}" 2040 ) 2041 } 2042 else: 2043 return { 2044 "status": "failed", 2045 "explanation": ( 2046 f"Your image and the solution image were not the" 2047 f" same ({diff_msg}): {comparison}" 2048 ) 2049 } 2050 2051 return compare_images 2052 2053 2054#---------------------------# 2055# One-size-fits-all default # 2056#---------------------------# 2057 2058LONG_STRING_LINES = 3 2059""" 2060Threshold in terms of newlines before we treat a string as long. 2061""" 2062 2063LONG_STRING_LEN = 120 2064""" 2065Threshold in terms of raw characters even without any newlines before we 2066treat a string as long. 2067""" 2068 2069 2070def omni_compare(val, ref, memo=None): 2071 """ 2072 A one-size-kinda-fits-all comparison method (although note that it 2073 assumes order matters in lists and tuples). 2074 2075 Tries its best to give a concise-ish description of differences when 2076 they exist. 2077 """ 2078 if memo is None: 2079 memo = {} 2080 2081 vid = id(val) 2082 rid = id(ref) 2083 if vid in memo and rid in memo[vid]: 2084 return True 2085 # Recursive comparison that's already in-progress 2086 2087 memo.setdefault(vid, set()).add(rid) 2088 2089 # TODO: Better/safer here! 2090 try: 2091 matched = val == ref 2092 except RecursionError: 2093 matched = False # might be, but we'll have to check further 2094 2095 if matched: 2096 return { 2097 "status": "accomplished", 2098 "explanation": ( 2099 f"Actual value is the same as the reference" 2100 f" value.<br>\n{html_tools.dynamic_html_repr(val)}" 2101 ) 2102 } 2103 else: # let's hunt for differences 2104 if isinstance(val, bool) and isinstance(ref, bool): 2105 # Bools are isinstance of int so we need this case before the 2106 # one below 2107 return { 2108 "status": "failed", 2109 "explanation": ( 2110 f"Your result ({val}) was the opposite of the" 2111 f" expected result ({ref})." 2112 ) 2113 } 2114 elif ( 2115 isinstance(val, (int, float, complex)) 2116 and isinstance(ref, (int, float, complex)) 2117 ): # what if they're both numbers? 2118 ncomp = numbers_are_equalish(val, ref) 2119 if ncomp["status"] == "accomplished": # close-enough numbers 2120 return ncomp 2121 else: # not close-enough 2122 return { 2123 "status": "failed", 2124 "explanation": ( 2125 f"Your result ({val}) and the expected" 2126 f" result ({ref}) are consequentially" 2127 f" different numbers." 2128 ) 2129 } 2130 elif type(val) != type(ref): # different types; not both numbers 2131 vt = html_tools.escape(str(type(val).__name__)) 2132 rt = html_tools.escape(str(type(ref).__name__)) 2133 return { 2134 "status": "failed", 2135 "explanation": ( 2136 f"Your result was {phrasing.a_an(vt)}," 2137 f" but the correct result was" 2138 f" {phrasing.a_an(rt)}." 2139 ) 2140 } 2141 elif isinstance(val, str): # both strings 2142 val_lines = val.splitlines() 2143 ref_lines = ref.splitlines() 2144 if ( 2145 max(len(val_lines), len(ref_lines)) >= LONG_STRING_LINES 2146 or max(len(val), len(ref)) >= LONG_STRING_LEN 2147 ): # longish strings so there should be some tolerance 2148 return very_fuzzy_string_compare( 2149 val, 2150 ref, 2151 line_match_threshold=1.0, 2152 sequence_match_threshold=1.0, 2153 partial_sequence_threshold=0.5 2154 ) # partial success only unless equal 2155 else: # shorter strings so very little tolerance 2156 close = strings_are_equal_modulo_most_whitespace(val, ref) 2157 diff_table = html_tools.html_diff_table( 2158 val_lines, 2159 ref_lines, 2160 "Actual output", 2161 "Corresponding lines (if any) in expected output" 2162 ) 2163 if close["status"] == "accomplished": 2164 return { 2165 "status": "partial", 2166 "explanation": ( 2167 f"Strings aren't the same, but they're" 2168 f" close:<br>\n{diff_table}" 2169 ) 2170 } 2171 else: 2172 return { 2173 "status": "failed", 2174 "explanation": ( 2175 f"Actual value is different from the" 2176 f" reference value:<br>\n{diff_table}" 2177 ) 2178 } 2179 2180 elif isinstance(val, (list, tuple)): # same-type sequences 2181 # TODO: Less inefficient here! 2182 # TODO: More detailed explication of deep differences here? 2183 return make_structure_comparator( 2184 order_matters=True, 2185 item_matcher=lambda val, ref: omni_compare(val, ref, memo) 2186 )(val, ref) 2187 elif isinstance(val, (set, dict)): # both sets or dicts 2188 # TODO: Less inefficient here! 2189 # TODO: More detailed explication of deep differences here? 2190 return make_structure_comparator( 2191 order_matters=True, 2192 item_matcher=lambda val, ref: omni_compare(val, ref, memo) 2193 )(val, ref) 2194 else: # not sure what kind of thing this is... 2195 return { 2196 "status": "failed", 2197 "explanation": ( 2198 f"Your value ({html_tools.dynamic_html_repr(val)})" 2199 f" and the reference value" 2200 f" ({html_tools.dynamic_html_repr(ref)}) are not" 2201 f" the same." 2202 ) 2203 }
22def report_unequal(val, ref): 23 """ 24 Returns a status/explanation dictionary reporting that two values 25 are unequal, including a report on their differences, which has 26 different forms depending on the values themselves. 27 """ 28 if isinstance(val, str) and isinstance(ref, str): 29 val_lines = val.splitlines(keepends=True) 30 ref_lines = ref.splitlines(keepends=True) 31 32 diff = html_tools.html_diff_table( 33 val_lines, 34 ref_lines, 35 'Actual value', 36 'Corresponding lines (if any) in expected value' 37 ) 38 else: 39 rval = repr(val) 40 rexp = repr(ref) 41 if max(len(rval), len(rexp)) < 50: 42 diff = "Actual: {}<br>\nExpected: {}".format( 43 repr(val), 44 repr(ref) 45 ) 46 else: 47 diff = "Actual:<br>\n{}<br><br>\nExpected:<br>\n{}".format( 48 html_tools.dynamic_html_repr(val), 49 html_tools.dynamic_html_repr(ref) 50 ) 51 52 return { 53 "status": "failed", 54 "explanation": ( 55 "Actual value is different from the reference" 56 " value:<br>\n{}" 57 ).format(diff) 58 }
Returns a status/explanation dictionary reporting that two values are unequal, including a report on their differences, which has different forms depending on the values themselves.
61def report_equal(val, ref): 62 """ 63 Returns a status/explanation dictionary reporting that two values 64 are equivalent. 65 """ 66 val_repr = repr(val) 67 if isinstance(val, str) and '\n' in val: 68 val_repr = "'''\\\n{}'''".format(val) 69 return { 70 "status": "accomplished", 71 "explanation": ( 72 "Actual value is the same as the reference value:<br>\n{}" 73 ).format( 74 html_tools.html_output_details( 75 val_repr, # TODO: Pretty printing here 76 "Value tested" 77 ) 78 ) 79 }
Returns a status/explanation dictionary reporting that two values are equivalent.
82def strict_equality_checker(val, ref, memo=None): 83 """ 84 A checker to be used with OutputTest or ValueTest goals that ensures 85 the output or value being checked is strictly equal to the reference 86 output or value. If there is a difference, some kind of diff is 87 printed, depending on the types of the values. 88 89 Handles recursive structures composed of tuples, lists, sets, and/or 90 dictionaries, but may choke on recursive structures stored in custom 91 objects, in which case those objects will only be considered equal 92 if they are the same object. 93 """ 94 if memo is None: 95 memo = set() 96 97 mkey = (id(val), id(ref)) 98 # If we're already in the process of comparing these two objects to 99 # each other, and we come across another comparison of the two, we 100 # can safely return True for the sub-comparison, and let other 101 # comparisons determine if they're equal or not. 102 if mkey in memo: 103 return { 104 "status": "accomplished", 105 "explanation": "Recursive comparison." 106 } 107 108 # Past this point, recursion might happen 109 memo.add(mkey) 110 111 if type(val) != type(ref): 112 return report_unequal(val, ref) 113 elif isinstance(val, (int, float, complex, type(None), bool, str)): 114 if val == ref: 115 return report_equal(val, ref) 116 else: 117 return report_unequal(val, ref) 118 elif isinstance(val, (tuple, list)): 119 if len(val) != len(ref): 120 return report_unequal(val, ref) 121 else: 122 test = all( 123 strict_equality_checker(v, r, memo)\ 124 ["status"] == "accomplished" # noqa E211 125 for (v, r) in zip(val, ref) 126 ) 127 if test: 128 return report_equal(val, ref) 129 else: 130 return report_unequal(val, ref) 131 elif isinstance(val, (set)): 132 sv = sorted(val) 133 sr = sorted(ref) 134 test = strict_equality_checker(sv, sr, memo) 135 if test["status"] == "accomplished": 136 return report_equal(val, ref) 137 else: 138 return report_unequal(val, ref) 139 elif isinstance(val, (dict)): 140 test = strict_equality_checker( 141 sorted(val.keys()), 142 sorted(ref.keys()), 143 memo 144 ) 145 if test["status"] != "accomplished": 146 return report_unequal(val, ref) 147 148 failed = False 149 for key in val: # keys must be same at this point 150 test = strict_equality_checker(val[key], ref[key], memo) 151 if test["status"] != "accomplished": 152 failed = True 153 break 154 155 if failed: 156 return report_unequal(val, ref) 157 else: 158 return report_equal(val, ref) 159 else: 160 # Some kind of object that we don't know about; try calling 161 # __eq__ if it has it, an upon a recursion error, simply 162 # compare IDs (which is Python default behavior when __eq__ 163 # isn't present). 164 if hasattr(val, "__eq__"): 165 try: 166 test = val == ref 167 except RecursionError: 168 test = val is ref 169 else: 170 test = val is ref 171 172 if test: 173 return report_equal(val, ref) 174 else: 175 return report_unequal(val, ref)
A checker to be used with OutputTest or ValueTest goals that ensures the output or value being checked is strictly equal to the reference output or value. If there is a difference, some kind of diff is printed, depending on the types of the values.
Handles recursive structures composed of tuples, lists, sets, and/or dictionaries, but may choke on recursive structures stored in custom objects, in which case those objects will only be considered equal if they are the same object.
182def round_to_sig_figs(n, figs=5): 183 """ 184 Rounds to the given number of significant figures (not decimal 185 places). Computes the location of the first significant figure, and 186 calls round with an appropriate (and possibly negative) precision 187 value. Still subject to the limits of floating point numbers for very 188 small values. 189 """ 190 if n == 0: 191 return 0 192 exp = math.floor(math.log10(abs(n))) 193 round_to = figs - exp 194 return round(n, round_to)
Rounds to the given number of significant figures (not decimal places). Computes the location of the first significant figure, and calls round with an appropriate (and possibly negative) precision value. Still subject to the limits of floating point numbers for very small values.
197def build_float_equality_checker(sig_figs=5, use_decimal_places=False): 198 """ 199 Builds and returns a checker function that tests whether the 200 submitted value (a float) is equal to the reference value (also a 201 float) up to the given number of significant figures (to avoid 202 penalizing students for floating point rounding issues if they did 203 math operations in a different order from the solution. The 204 significant figures are actually treated as such, not as decimal 205 places, using round_to_sig_figs. This behavior is altered if 206 use_decimal_places is True, in which case sig_figs is treated not as 207 a number of significant figures but as a number of decimal places to 208 round to. 209 """ 210 def check_float_equality(val, ref): 211 """ 212 Checks floating-point equality after rounding to a certain number 213 of significant figures (or possibly decimal places). 214 """ 215 if use_decimal_places: 216 rval = round(val, sig_figs) 217 rref = round(ref, sig_figs) 218 else: 219 rval = round_to_sig_figs(val, sig_figs) 220 rref = round_to_sig_figs(ref, sig_figs) 221 222 if rval == rref: 223 return { 224 "status": "accomplished", 225 "explanation": ( 226 "Value {} matches expected value to the required" 227 + " precision." 228 ).format(val) 229 } 230 else: 231 return { 232 "status": "failed", 233 "explanation": ( 234 "Value {} does not match expected value {} to the " 235 + "required precision. Compared as:<br>\n" 236 + "{} (value)<br>\n" 237 + "{} (expected)" 238 ).format(val, ref, rval, rref) 239 } 240 241 return check_float_equality
Builds and returns a checker function that tests whether the submitted value (a float) is equal to the reference value (also a float) up to the given number of significant figures (to avoid penalizing students for floating point rounding issues if they did math operations in a different order from the solution. The significant figures are actually treated as such, not as decimal places, using round_to_sig_figs. This behavior is altered if use_decimal_places is True, in which case sig_figs is treated not as a number of significant figures but as a number of decimal places to round to.
The relative tolerance for floating-point similarity (see
cmath.isclose
).
The absolute tolerance for floating-point similarity (see
cmath.isclose
).
257def numbers_are_equalish(val, ref): 258 """ 259 Uses `cmath.isclose` to compare two floating-point numbers, and 260 returns an evaluation result (a dictionary w/ status and explanation 261 slots). 262 263 The first number should be from the submitted code and the second 264 should be the correct value. 265 """ 266 if cmath.isclose( 267 val, 268 ref, 269 rel_tol=FLOAT_REL_TOLERANCE, 270 abs_tol=FLOAT_ABS_TOLERANCE 271 ): 272 return { 273 "status": "accomplished", 274 "explanation": ( 275 f"Value {val} matches expected value to the required" 276 f" precision." 277 ) 278 } 279 else: 280 return { 281 "status": "failed", 282 "explanation": ( 283 f"Value {val} does not match expected value {ref} to the" 284 f" required precision." 285 ) 286 }
Uses cmath.isclose
to compare two floating-point numbers, and
returns an evaluation result (a dictionary w/ status and explanation
slots).
The first number should be from the submitted code and the second should be the correct value.
293def multiline_strings_are_exactly_equal(val, ref): 294 """ 295 A checker to be used with harnesses which generate multi-line strings 296 where the outputs can reasonable be expected to be exactly equal. 297 Displays results more nicely than `omni_compare` because it knows 298 they'll be strings. 299 """ 300 if not isinstance(val, str) or not isinstance(ref, str): 301 raise TypeError( 302 ( 303 "Can't check string equality for non-string value(s):\n" 304 + "{}\nand\n{}" 305 ).format(repr(val), repr(ref)) 306 ) 307 308 if val == ref: 309 return { 310 "status": "accomplished", 311 "explanation": "Values are the same.<br>\n{}".format( 312 html_tools.html_output_details(val, "Value tested") 313 ) 314 } 315 else: 316 val_lines = val.splitlines(keepends=True) 317 ref_lines = ref.splitlines(keepends=True) 318 diffTable = html_tools.html_diff_table( 319 val_lines, 320 ref_lines, 321 "Actual result", 322 "Corresponding lines (if any) in expected result" 323 ) 324 return { 325 "status": "failed", 326 "explanation": "Values are different.<br>\n{}".format( 327 diffTable 328 ) 329 }
A checker to be used with harnesses which generate multi-line strings
where the outputs can reasonable be expected to be exactly equal.
Displays results more nicely than omni_compare
because it knows
they'll be strings.
332def strings_are_equal_modulo_whitespace(val, ref, ignore_case=True): 333 """ 334 A checker to be used with OutputTest or ValueTest goals that checks 335 whether the value and the reference value are equivalent strings if 336 whitespace is ignored completely. Fails if either value isn't a 337 string. Ignores case by default. 338 """ 339 trans = {ord(c): '' for c in ' \t\n\r'} 340 if not isinstance(val, str) or not isinstance(ref, str): 341 raise TypeError( 342 ( 343 "Can't check string equality for non-string value(s):\n" 344 + "{}\nand\n{}" 345 ).format(repr(val), repr(ref)) 346 ) 347 348 val_collapsed = val.translate(trans) 349 ref_collapsed = ref.translate(trans) 350 351 if ignore_case: 352 val_collapsed = val_collapsed.casefold() 353 ref_collapsed = ref_collapsed.casefold() 354 355 if val == ref: 356 return { 357 "status": "accomplished", 358 "explanation": "Strings are the same.<br>\n{}".format( 359 html_tools.html_output_details(val, "Value tested") 360 ) 361 } 362 elif val_collapsed == ref_collapsed: 363 return { 364 "status": "accomplished", 365 "explanation": ( 366 "Strings are the same (ignoring whitespace).<br>\n{}" 367 ).format( 368 html_tools.html_output_details( 369 val_collapsed, 370 "Simplified value" 371 ) 372 ) 373 } 374 else: 375 val_lines = val.splitlines(keepends=True) 376 ref_lines = ref.splitlines(keepends=True) 377 378 diffTable = html_tools.html_diff_table( 379 val_lines, 380 ref_lines, 381 'Actual output', 382 'Corresponding lines (if any) in expected output' 383 ) 384 385 return { 386 "status": "failed", 387 "explanation": ( 388 "Actual value is different from the reference value:<br>\n{}" 389 ).format(diffTable) 390 }
A checker to be used with OutputTest or ValueTest goals that checks whether the value and the reference value are equivalent strings if whitespace is ignored completely. Fails if either value isn't a string. Ignores case by default.
393def without_trailing_whitespace(s): 394 """ 395 Returns the given string with any trailing whitespace removed. 396 Returns an empty string if given only whitespace. 397 """ 398 trailing = 0 399 for c in s: 400 if re.match(r'\s', c): # it's whitespace 401 trailing += 1 402 else: 403 trailing = 0 404 405 if trailing > 0: 406 return s[:-trailing] 407 else: 408 return s
Returns the given string with any trailing whitespace removed. Returns an empty string if given only whitespace.
411def without_trailing_whitespace_per_line(s): 412 """ 413 Returns a version of the given multi-line string where trailing 414 whitespace has been stripped from each line. 415 """ 416 return '\n'.join( 417 without_trailing_whitespace(line) 418 for line in s.split('\n') 419 )
Returns a version of the given multi-line string where trailing whitespace has been stripped from each line.
422def compress_line(s): 423 """ 424 Compresses internal whitespace and strips trailing whitespace from a 425 single line of text. 426 """ 427 w = without_trailing_whitespace(s) 428 return re.sub(r'\s+', ' ', w)
Compresses internal whitespace and strips trailing whitespace from a single line of text.
431def compress_whitespace(s): 432 """ 433 Returns a version of the given string where all runs of one or more 434 whitespace characters on each line have been replaced with a single 435 space character, and all trailing whitespace has been stripped from 436 each line, plus any blank lines have been removed. 437 """ 438 return '\n'.join( 439 filter( 440 lambda x: x != '', 441 [ 442 compress_line(line) 443 for line in s.split('\n') 444 ] 445 ) 446 )
Returns a version of the given string where all runs of one or more whitespace characters on each line have been replaced with a single space character, and all trailing whitespace has been stripped from each line, plus any blank lines have been removed.
449def strings_are_equal_modulo_most_whitespace( 450 val, 451 ref, 452 track_internal_whitespace=False, 453 ignore_case=True 454): 455 """ 456 A checker to be used with OutputTest or ValueTest goals that checks 457 whether the value and the reference value are exactly equivalent 458 strings after any trailing and extra internal whitespace is deleted. 459 Fails if either value isn't a string. 460 461 if track_internal_whitespace is True, will be only partially 462 successful if there are differences in non-trailing whitespace or 463 blank lines. 464 465 By default, case is ignored. 466 """ 467 if not isinstance(val, str) or not isinstance(ref, str): 468 raise TypeError( 469 ( 470 "Can't check string equality for non-string value(s):\n" 471 + "{}\nand\n{}" 472 ).format(repr(val), repr(ref)) 473 ) 474 475 if ignore_case: 476 val_stripped = without_trailing_whitespace_per_line(val).casefold() 477 ref_stripped = without_trailing_whitespace_per_line(ref).casefold() 478 else: 479 val_stripped = without_trailing_whitespace_per_line(val) 480 ref_stripped = without_trailing_whitespace_per_line(ref) 481 482 val_compressed = compress_whitespace(val_stripped) 483 ref_compressed = compress_whitespace(ref_stripped) 484 485 if val == ref: # full equality 486 return { 487 "status": "accomplished", 488 "explanation": "Strings are the same.<br>\n{}".format( 489 html_tools.html_output_details(val, "Value tested") 490 ) 491 } 492 elif val_stripped == ref_stripped: # equal modulo trailing whitespace 493 return { 494 "status": "accomplished", 495 "explanation": ( 496 "Strings are the same (ignoring trailing whitespace).<br>\n{}" 497 ).format( 498 html_tools.html_output_details( 499 val_stripped, 500 "Simplified value" 501 ) 502 ) 503 } 504 elif val_compressed == ref_compressed: # equal when compressed 505 if track_internal_whitespace: 506 val_lines = val.splitlines(keepends=True) 507 ref_lines = ref.splitlines(keepends=True) 508 509 diffTable = html_tools.html_diff_table( 510 val_lines, 511 ref_lines, 512 "Actual output", 513 "Corresponding lines (if any) in expected output" 514 ) 515 return { 516 "status": "partial", 517 "explanation": ( 518 "Strings aren't the same, but all differences are just" 519 + " in whitespace:<br>\n{}" 520 ).format(diffTable) 521 } 522 else: 523 return { 524 "status": "accomplished", 525 "explanation": ( 526 "Strings are the same (ignoring most whitespace).<br>\n{}" 527 ).format( 528 html_tools.html_output_details( 529 val_compressed, 530 "Simplified value" 531 ) 532 ) 533 } 534 else: 535 val_lines = val.splitlines(keepends=True) 536 ref_lines = ref.splitlines(keepends=True) 537 538 diffTable = html_tools.html_diff_table( 539 val_lines, 540 ref_lines, 541 "Actual output", 542 "Corresponding lines (if any) in expected output" 543 ) 544 545 return { 546 "status": "failed", 547 "explanation": ( 548 "Actual value is different from the reference value:<br>\n{}" 549 ).format(diffTable) 550 }
A checker to be used with OutputTest or ValueTest goals that checks whether the value and the reference value are exactly equivalent strings after any trailing and extra internal whitespace is deleted. Fails if either value isn't a string.
if track_internal_whitespace is True, will be only partially successful if there are differences in non-trailing whitespace or blank lines.
By default, case is ignored.
556def clean_word_list(line): 557 """ 558 Takes a single line of output and produces a list of words, which are 559 taken to be strictly contiguous sequences of characters in the \\w 560 class, plus apostrophe. 561 """ 562 return word_re.findall(line)
Takes a single line of output and produces a list of words, which are taken to be strictly contiguous sequences of characters in the \w class, plus apostrophe.
565def rough_sequence_contains( 566 val_lines, 567 ref_lines, 568 sequence_match_threshold, 569 partial_sequence_threshold, 570 line_match_threshold, 571): 572 """ 573 Returns the string "full" if when diffing val_lines and ref_lines, 574 the percentage of lines from ref_lines that are matched is at or 575 above the sequence_match_threshold, and the string "partial" if that 576 threshold isn't met bu the partial_sequence_threshold is. Returns None 577 if neither threshold is met. 578 579 Adjacent delete/add pairs where the deleted line has a difflib 580 sequence similarity ratio of at least the line_match_threshold are 581 considered matching lines. 582 """ 583 differ = difflib.Differ() 584 comparison = [ 585 line 586 for line in differ.compare(val_lines, ref_lines) 587 if line[:2] != '? ' # filter out comment lines 588 ] 589 590 # chunk the diff to group sequences of additions and deletions 591 chunks = [] 592 while comparison: 593 if comparison[0][:2] == ' ': # grab a chunk of matches 594 i = 0 595 while i < len(comparison) and comparison[i][:2] == ' ': 596 i += 1 597 matching_block = comparison[:i] 598 comparison = comparison[i:] 599 chunks.append(('match', matching_block)) 600 601 elif comparison[0][:2] == '- ': # grab a chunk of deletions 602 i = 0 603 while i < len(comparison) and comparison[i][:2] == '- ': 604 i += 1 605 # If next thing is a + that matches the last -, don't include 606 # the last - 607 if ( 608 i < len(comparison) 609 and comparison[i][:2] == '+ ' 610 and difflib.SequenceMatcher( 611 None, 612 comparison[i - 1][2:], 613 comparison[i][2:] 614 ).ratio() >= line_match_threshold # noqa: E123 615 ): 616 missing_block = comparison[:i - 1] 617 paired_block = comparison[i - 1:i + 1] 618 comparison = comparison[i + 1:] 619 else: 620 missing_block = comparison[:i] 621 paired_block = None 622 comparison = comparison[i:] 623 if missing_block: # might be empty list 624 chunks.append(('missing', missing_block)) 625 if paired_block: # might be None 626 chunks.append(('paired', paired_block)) 627 628 elif comparison[0][:2] == '+ ': # grab a chunk of additions 629 i = 0 630 while i < len(comparison) and comparison[i][:2] == '+ ': 631 i += 1 632 added_block = comparison[:i] 633 comparison = comparison[i:] 634 chunks.append(('added', added_block)) 635 636 # Count matched + unmatched lines 637 matched = 0 638 unmatched = 0 639 for chtype, lines in chunks: 640 if chtype == 'match': 641 matched += len(lines) 642 elif chtype == 'paired': 643 matched += 1 644 elif chtype == 'added': 645 unmatched += len(lines) 646 647 # Note we don't care about the number of deletions necessary 648 649 # Who knows if matched + unmatched will add up? Who cares? 650 match_proportion = max( 651 matched / len(ref_lines), 652 (len(ref_lines) - unmatched) / len(ref_lines) 653 ) 654 655 # Make our decision 656 if (match_proportion >= sequence_match_threshold): 657 return "full" 658 elif (match_proportion >= partial_sequence_threshold): 659 return "partial" 660 else: 661 return None
Returns the string "full" if when diffing val_lines and ref_lines, the percentage of lines from ref_lines that are matched is at or above the sequence_match_threshold, and the string "partial" if that threshold isn't met bu the partial_sequence_threshold is. Returns None if neither threshold is met.
Adjacent delete/add pairs where the deleted line has a difflib sequence similarity ratio of at least the line_match_threshold are considered matching lines.
664def very_fuzzy_string_compare( 665 val, 666 ref, 667 line_match_threshold=0.5, 668 sequence_match_threshold=0.8, 669 partial_sequence_threshold=None, 670): 671 """ 672 A checker to be used with OutputTest or ValueTest goals that checks 673 whether the value and the reference value are kinda related. What it 674 does is chops each line of both strings up into words, which consist 675 of one or more alphanumeric characters, and ignores the rest of the 676 line. These ordered tuples of words are made into lists, and the test 677 succeeds if there is some ordered mapping from the word-tuple-list of 678 the value to the word-tuple-list of the reference value that covers 679 at least sequence_match_threshold percent of the items in the 680 reference list, no matter how many items in the value list are 681 unmatched (in other words, there may be a huge amount of extra 682 output, but we don't care). 683 684 The mapping may only map together lines where the difflib sequence 685 similarity is at least the line_match_threshold (but this is after 686 collapsing the tuple to a string with spaces in between, meaning that 687 punctuation is ignored). 688 689 The partial_sequence_threshold serves as the threshold for what 690 percent of the reference sequence must be matched for the result to 691 be a partial success. If not given, it will be set to 1/2 of the 692 sequence_match_threshold. 693 694 Case and blank lines are ignored. 695 """ 696 697 if partial_sequence_threshold is None: 698 partial_sequence_threshold = sequence_match_threshold / 2 699 700 if not isinstance(val, str) or not isinstance(ref, str): 701 raise TypeError( 702 ( 703 "Can't check string equality for non-string value(s):\n" 704 + "{}\nand\n{}" 705 ).format(repr(val), repr(ref)) 706 ) 707 708 val_folded = val.casefold() 709 ref_folded = ref.casefold() 710 711 val_compressed = compress_whitespace(val_folded) 712 ref_compressed = compress_whitespace(ref_folded) 713 714 if val_folded == ref_folded: # full equality ignoring case 715 return { 716 "status": "accomplished", 717 "explanation": "Strings are the same.<br>\n{}".format( 718 html_tools.html_output_details(val, "Value tested") 719 ) 720 } 721 elif val_compressed == ref_compressed: # equal when compressed 722 return { 723 "status": "accomplished", 724 "explanation": ( 725 "Strings are the same (ignoring most whitespace).<br>\n{}" 726 ).format( 727 html_tools.html_output_details( 728 val_compressed, 729 "Simplified value" 730 ) 731 ) 732 } 733 734 else: # we actually have to check matches 735 val_raw_lines = val.splitlines(keepends=True) 736 ref_raw_lines = ref.splitlines(keepends=True) 737 738 diffTable = html_tools.html_diff_table( 739 val_raw_lines, 740 ref_raw_lines, 741 "Actual output", 742 "Corresponding lines (if any) in expected output" 743 ) 744 745 val_wordlists = [ 746 clean_word_list(line) 747 for line in val_compressed.splitlines() 748 if clean_word_list(line) 749 ] 750 ref_wordlists = [ 751 clean_word_list(line) 752 for line in ref_compressed.splitlines() 753 if clean_word_list(line) 754 ] 755 756 val_wordlines = [' '.join(wl) for wl in val_wordlists] 757 ref_wordlines = [' '.join(wl) for wl in ref_wordlists] 758 759 # Check for a rough match between the sequences... 760 match_status = rough_sequence_contains( 761 val_wordlines, 762 ref_wordlines, 763 sequence_match_threshold, 764 partial_sequence_threshold, 765 line_match_threshold 766 ) 767 768 if match_status == "full": 769 return { 770 "status": "accomplished", 771 "explanation": ( 772 "Strings are similar enough.<br>\n{}" 773 ).format(diffTable) 774 } 775 elif match_status == "partial": 776 return { 777 "status": "partial", 778 "explanation": ( 779 "Strings are somewhat similar.<br>\n{}" 780 ).format(diffTable) 781 } 782 else: 783 return { 784 "status": "failed", 785 "explanation": ( 786 "Actual value is different from the reference" 787 + " value:<br>\n{}" 788 ).format(diffTable) 789 }
A checker to be used with OutputTest or ValueTest goals that checks whether the value and the reference value are kinda related. What it does is chops each line of both strings up into words, which consist of one or more alphanumeric characters, and ignores the rest of the line. These ordered tuples of words are made into lists, and the test succeeds if there is some ordered mapping from the word-tuple-list of the value to the word-tuple-list of the reference value that covers at least sequence_match_threshold percent of the items in the reference list, no matter how many items in the value list are unmatched (in other words, there may be a huge amount of extra output, but we don't care).
The mapping may only map together lines where the difflib sequence similarity is at least the line_match_threshold (but this is after collapsing the tuple to a string with spaces in between, meaning that punctuation is ignored).
The partial_sequence_threshold serves as the threshold for what percent of the reference sequence must be matched for the result to be a partial success. If not given, it will be set to 1/2 of the sequence_match_threshold.
Case and blank lines are ignored.
792def build_contains_re_checker(name, expression, required_matches=1): 793 """ 794 Returns a checker function that tests whether the submitted value 795 contains at least the required number of matches for the given 796 regular expression (a string, pre-compiled expression, or function 797 which will produce an re.Pattern when given a reference string). The 798 name is used to describe the pattern in the explanation returned. 799 """ 800 if isinstance(expression, str): 801 build_regex = lambda ref: re.compile(expression) 802 elif isinstance(expression, re.Pattern): 803 build_regex = lambda ref: expression 804 else: 805 build_regex = expression 806 807 def string_contains_re(val, ref): 808 """ 809 This will be replaced. 810 """ 811 regex = build_regex(ref) 812 if not isinstance(val, str): 813 return { 814 "status": "failed", 815 "explanation": ( 816 "Value is not a string! (was looking for: '{}')" 817 ).format(name) 818 } 819 matches = regex.findall(val) 820 if len(matches) >= required_matches: 821 if required_matches == 1: 822 return { 823 "status": "accomplished", 824 "explanation": "Found {}.".format(name) 825 } 826 else: 827 return { 828 "status": "accomplished", 829 "explanation": ( 830 "Found {} matches (of {} required) for {}." 831 ).format(len(matches), required_matches, name) 832 } 833 else: 834 if required_matches == 1: 835 return { 836 "status": "failed", 837 "explanation": ("Did not find {}.").format(name) 838 } 839 else: 840 return { 841 "status": "failed", 842 "explanation": ( 843 "Found {} matches (of {} required) for {}." 844 ).format(len(matches), required_matches, name) 845 } 846 847 string_contains_re.__doc__ = """\ 848Returns "accomplished" if the value contains at least {} 849matche(s) for the regular expression: 850 851 {} 852 853In the output, it is referred to as: 854 855 {} 856""".format( 857 required_matches, 858 repr(expression), 859 name 860) 861 return string_contains_re
Returns a checker function that tests whether the submitted value contains at least the required number of matches for the given regular expression (a string, pre-compiled expression, or function which will produce an re.Pattern when given a reference string). The name is used to describe the pattern in the explanation returned.
864def build_approx_matcher(threshold=0.8, partial_threshold=0.6): 865 """ 866 Returns a checker function that uses a difflib.SequenceMatcher to 867 computer a difference ratio for the given values (should be strings) 868 and compares it to the given threshold. 869 """ 870 def approx_check(val, ref): 871 """ 872 Uses a difflib.SequenceMatcher to compare two strings, and 873 succeeds if their similarity is at least a certain threshold. 874 """ 875 if not isinstance(val, str) or not isinstance(ref, str): 876 raise TypeError("Value and reference value must both be strings.") 877 878 ratio = difflib.SequenceMatcher(None, val, ref).ratio() 879 880 if ratio >= threshold: 881 status = "accomplished" 882 status_desc = "similar enough" 883 if ratio >= 0.9: 884 status_desc = "almost identical" 885 elif ratio == 1: 886 status_desc = "exactly the same" 887 elif ratio >= partial_threshold: 888 status = "partial" 889 status_desc = "somewhat similar" 890 else: 891 status = "failed" 892 status_desc = "not the same" 893 894 # TODO: NOT THIS HERE! 895 explanation = ( 896 "Values were {}.<br>\n{}" 897 ).format( 898 status_desc, 899 html_tools.html_diff_table( 900 val, 901 ref, 902 joint_title="Values compared:" 903 ) 904 ) 905 906 return { 907 "status": status, 908 "explanation": explanation 909 } 910 911 return approx_check
Returns a checker function that uses a difflib.SequenceMatcher to computer a difference ratio for the given values (should be strings) and compares it to the given threshold.
918def result_distributions_have_same_keys(obs_dist, ref_dist): 919 """ 920 Compares two result distribution dictionaries to ensure that they 921 include the same set of keys, regardless of how common those keys 922 are. Each dictionary should contain "trials" and "results" keys, 923 where the "results" key maps different results to integer 924 frequencies. 925 """ 926 927 obs_res = obs_dist["results"] 928 ref_res = ref_dist["results"] 929 930 extra = set(obs_res) - set(ref_res) 931 missing = set(ref_res) - set(obs_res) 932 933 if extra or missing: 934 return { 935 "status": "failed", 936 "explanation": html_tools.build_html_details( 937 "Possibilities are different", 938 "<ul>\n{}\n</ul>".format( 939 '\n'.join( 940 [ 941 ( 942 "<li>Observed result:\n<pre>{}</pre>" 943 + "\nnever occurred in reference" 944 + " distribution.</li>" 945 ).format(ext) 946 for ext in extra 947 ] + [ 948 ( 949 "<li>Expected result:\n<pre>{}</pre>" 950 + "\nnever occurred in observed" 951 + " distribution.</li>" 952 ).format(mis) 953 for mis in missing 954 ] 955 ) 956 ) 957 ) 958 } 959 else: 960 return { 961 "status": "accomplished", 962 "explanation": ( 963 "Observed distribution has the same set of possibilities" 964 + " as the reference distribution." 965 ) 966 }
Compares two result distribution dictionaries to ensure that they include the same set of keys, regardless of how common those keys are. Each dictionary should contain "trials" and "results" keys, where the "results" key maps different results to integer frequencies.
969def build_distribution_comparator(precision=2): 970 """ 971 Returns a comparator function for result distributions, where 972 fractions of the result in each distribution category are expected to 973 agree after rounding to the given number of decimal places. 974 """ 975 def result_distributions_are_similar(obs_dist, ref_dist): 976 """ 977 Compares two result distribution dictionaries for statistical 978 similarity. Each dictionary should contain "trials" and "results" 979 keys, where the "results" key maps different results to integer 980 frequencies. 981 982 # TODO: Better statistical tests here... 983 """ 984 985 obs_res = obs_dist["results"] 986 ref_res = ref_dist["results"] 987 988 extra = set(obs_res) - set(ref_res) 989 missing = set(ref_res) - set(obs_res) 990 991 if extra or missing: 992 return { 993 "status": "failed", 994 "explanation": html_tools.build_html_details( 995 "Distributions are different", 996 "<ul>\n{}\n</ul>".format( 997 '\n'.join( 998 [ 999 ( 1000 "<li>Observed result:\n<pre>{}</pre>" 1001 + "\nnever occurred in reference" 1002 + " distribution.</li>" 1003 ).format(ext) 1004 for ext in extra 1005 ] + [ 1006 ( 1007 "<li>Expected result:\n<pre>{}</pre>" 1008 + "\nnever occurred in observed" 1009 + " distribution.</li>" 1010 ).format(mis) 1011 for mis in missing 1012 ] 1013 ) 1014 ) 1015 ) 1016 } 1017 1018 disagree = [] 1019 for key in sorted(obs_res): 1020 obs_fr = round(obs_res[key] / obs_dist["trials"], precision) 1021 ref_fr = round(ref_res[key] / ref_dist["trials"], precision) 1022 if obs_fr != ref_fr: 1023 disagree.append((key, obs_fr, ref_fr)) 1024 1025 if disagree: 1026 return { 1027 "status": "failed", 1028 "explanation": html_tools.build_html_details( 1029 "Distributions were noticeably different", 1030 "<ul>\n{}\n</ul>".format( 1031 '\n'.join( 1032 ( 1033 "<li>Result\n<pre>{}</pre>\n occurred" 1034 + " about {}% of the time in the reference" 1035 + " distribution but was observed about" 1036 + " {}% of the time.</li>" 1037 ).format( 1038 key, 1039 100 * ref_fr, 1040 100 * obs_fr 1041 ) 1042 for (key, obs_fr, ref_fr) in disagree 1043 ) 1044 ) 1045 ) 1046 } 1047 else: 1048 return { 1049 "status": "accomplished", 1050 "explanation": ( 1051 "Distributions agreed to the required precision:<br>\n" 1052 + str(ref_res) + " (ref)<br>\n" 1053 + str(obs_res) + " (obs)" 1054 ) 1055 } 1056 1057 return result_distributions_are_similar
Returns a comparator function for result distributions, where fractions of the result in each distribution category are expected to agree after rounding to the given number of decimal places.
1069def reset_stubs(): 1070 """ 1071 Resets the stub counter. Useful to ensure that equivalent data 1072 structures are equivalent when made hashable via equivalent stubs, 1073 although order really matters (i.e., recursive dictionaries with 1074 different key orders will not necessarily end up with the same 1075 stubs). 1076 """ 1077 global _STUB_N 1078 _STUB_N = 0
Resets the stub counter. Useful to ensure that equivalent data structures are equivalent when made hashable via equivalent stubs, although order really matters (i.e., recursive dictionaries with different key orders will not necessarily end up with the same stubs).
1081def next_stub(): 1082 """ 1083 Generates a unique string to indicate the presence of a recursive 1084 structure encountered during make_hashable. 1085 """ 1086 global _STUB_N 1087 n = _STUB_N 1088 _STUB_N += 1 1089 result = "" 1090 while n // _STUB_LEN > 0: 1091 result = _STUB_CHARS[n % _STUB_LEN] + result 1092 n //= _STUB_LEN 1093 result = _STUB_CHARS[n % _STUB_LEN] + result 1094 return "R&" + result
Generates a unique string to indicate the presence of a recursive structure encountered during make_hashable.
1097def make_hashable(dataStructure, memo=None): 1098 """ 1099 Takes a data structure that may contain lists, tuples, dictionaries, 1100 and/or sets as well as strings, numbers, booleans, and Nones, and 1101 turns it into a similar structure using tuples in place of lists, 1102 sets, and dictionaries that can be hashed. Uses a ID-based memo to 1103 avoid re-processing recursive structures, with string stubs in place 1104 of already-visited objects. 1105 """ 1106 if memo is None: 1107 reset_stubs() 1108 memo = {} 1109 1110 # If this is a re-visit to a container object, return a stub string 1111 if ( 1112 not isinstance(dataStructure, (int, float, str, bool)) 1113 and id(dataStructure) in memo 1114 ): 1115 return memo[id(dataStructure)] 1116 1117 # Save a stub string in case of a future re-visit 1118 memo[id(dataStructure)] = next_stub() 1119 1120 if isinstance(dataStructure, (list, tuple)): 1121 return tuple([make_hashable(x, memo) for x in dataStructure]) 1122 elif isinstance(dataStructure, set): 1123 # We sort sets so that ordering is irrelevant 1124 return tuple([make_hashable(x, memo) for x in sorted(dataStructure)]) 1125 elif isinstance(dataStructure, dict): 1126 # We sort keys so that ordering is irrelevant 1127 return tuple( 1128 (key, make_hashable(dataStructure[key], memo)) 1129 for key in sorted(dataStructure) 1130 ) 1131 else: 1132 try: 1133 _ = hash(dataStructure) 1134 except TypeError as e: 1135 if "unhashable" in str(e): 1136 raise TypeError( 1137 ( 1138 "Could not convert object of type {} to hashable" 1139 + " equivalent:\n{}" 1140 ).format(type(dataStructure), repr(dataStructure)) 1141 ) 1142 else: 1143 raise 1144 1145 # If we get here it's fully hashable already 1146 return dataStructure
Takes a data structure that may contain lists, tuples, dictionaries, and/or sets as well as strings, numbers, booleans, and Nones, and turns it into a similar structure using tuples in place of lists, sets, and dictionaries that can be hashed. Uses a ID-based memo to avoid re-processing recursive structures, with string stubs in place of already-visited objects.
1149def longest_common_subsequence( 1150 seq1, 1151 seq2, 1152 start1=None, end1=None, 1153 start2=None, end2=None, 1154 matcher=lambda x, y: x == y, 1155 memo=None 1156): 1157 """ 1158 Finds the longest common subsequence between the continuous 1159 subsequences of the two given sequences which start and end at the 1160 given start/end points for each sequence. Uses the memo (which should 1161 never be provided explicitly) to remember solutions to sub-problems 1162 and greatly speed up the results. 1163 1164 The custom matcher function determines what counts as a match. If it 1165 returns a dictionary with a 'status' key, the two items are counted 1166 as matching only if that status value is exactly "accomplished". 1167 Otherwise, it should return a boolean, and any truthy return value 1168 will be counted as a match. 1169 1170 The return value is actually a tuple of pairs of integer indices, 1171 where each pair contains one index from sequence 1 and the index of a 1172 matching item from sequence 2. 1173 1174 If there are multiple different longest common subsequences, the one 1175 including the earliest items from sequence 1 is returned. 1176 """ 1177 # Assign default start/end values if they're missing: 1178 if start1 is None: 1179 start1 = 0 1180 if end1 is None: 1181 end1 = len(seq1) 1182 if start2 is None: 1183 start2 = 0 1184 if end2 is None: 1185 end2 = len(seq2) 1186 1187 # Create a new memo if this isn't a recursive call: 1188 if memo is None: 1189 memo = {} 1190 1191 memo_key = (start1, end1, start2, end2) 1192 1193 # If we've already solved this sub-problem return the stored solution 1194 if memo_key in memo: 1195 return memo[memo_key] 1196 1197 # Recursive base case: 1198 if start1 == end1 or start2 == end2: 1199 result = () # no match -> an empty tuple 1200 memo[memo_key] = result 1201 return result 1202 else: 1203 # Look at first item from each (use it or lose it) 1204 item1 = seq1[start1] 1205 item2 = seq2[start2] 1206 1207 match_test = matcher(item1, item2) 1208 # TODO: Some kind of handling for partial matches here?!? 1209 if isinstance(match_test, dict) and 'status' in match_test: 1210 match_test = match_test['status'] == 'accomplished' 1211 1212 if match_test: 1213 # first items match: recurse and then add them to the results 1214 restMatches = longest_common_subsequence( 1215 seq1, 1216 seq2, 1217 start1 + 1, end1, 1218 start2 + 1, end2, 1219 matcher, 1220 memo 1221 ) 1222 result = ((start1, start2),) + restMatches 1223 memo[memo_key] = result 1224 return result 1225 else: 1226 # first items don't match: check both possibilities (one or 1227 # the other won't be part of the overall match) 1228 case1Matches = longest_common_subsequence( 1229 seq1, 1230 seq2, 1231 start1 + 1, end1, 1232 start2, end2, 1233 matcher, 1234 memo 1235 ) 1236 case2Matches = longest_common_subsequence( 1237 seq1, 1238 seq2, 1239 start1, end1, 1240 start2 + 1, end2, 1241 matcher, 1242 memo 1243 ) 1244 if len(case1Matches) < len(case2Matches): 1245 # case 2 is longer 1246 memo[memo_key] = case2Matches 1247 return case2Matches 1248 else: 1249 # a tie gets biased towards earlier items from sequence 1 1250 memo[memo_key] = case1Matches 1251 return case1Matches
Finds the longest common subsequence between the continuous subsequences of the two given sequences which start and end at the given start/end points for each sequence. Uses the memo (which should never be provided explicitly) to remember solutions to sub-problems and greatly speed up the results.
The custom matcher function determines what counts as a match. If it returns a dictionary with a 'status' key, the two items are counted as matching only if that status value is exactly "accomplished". Otherwise, it should return a boolean, and any truthy return value will be counted as a match.
The return value is actually a tuple of pairs of integer indices, where each pair contains one index from sequence 1 and the index of a matching item from sequence 2.
If there are multiple different longest common subsequences, the one including the earliest items from sequence 1 is returned.
1254def diff_blocks(seq1, seq2, matcher=lambda x, y: x == y): 1255 """ 1256 Returns a list of 3-element lists in the same format as 1257 difflib.SequenceMatcher's get_matching_blocks output: A list of 1258 [seq1-index, seq1-index, length] triples where each triple indicates 1259 that sequences 1 and 2 match starting at seq1-index in sequence 1 and 1260 seq2-index in sequence 2 and continuing for length items. 1261 1262 The difference is that this method accepts an optional custom matcher 1263 function which is used to establish whether two items are equal or 1264 not. That matcher function may return a truthy value to indicate a 1265 match, or it could be a checker function which returns a dictionary 1266 containing a 'status' key, where the status being 'accomplished' 1267 indicates a match. 1268 """ 1269 # Get longest common subsequence 1270 lcs = longest_common_subsequence(seq1, seq2, matcher=matcher) 1271 if len(lcs) == 0: 1272 return [] # no matches whatsoever... 1273 1274 # Convert sequence of matched indices to a list of matching blocks 1275 1276 # The first block is a length-1 block anchored at the first matching 1277 # indices 1278 blocks = [ [lcs[0][0], lcs[0][1], 1] ] 1279 1280 # We go through all point matches and extend each block or create a 1281 # new block 1282 for i1, i2 in lcs[1:]: 1283 if ( 1284 i1 == blocks[-1][0] + blocks[-1][2] 1285 and i2 == blocks[-1][1] + blocks[-1][2] 1286 ): 1287 blocks[-1][2] += 1 # block continues 1288 else: 1289 blocks.append([i1, i2, 1]) # new block starts 1290 1291 return blocks
Returns a list of 3-element lists in the same format as difflib.SequenceMatcher's get_matching_blocks output: A list of [seq1-index, seq1-index, length] triples where each triple indicates that sequences 1 and 2 match starting at seq1-index in sequence 1 and seq2-index in sequence 2 and continuing for length items.
The difference is that this method accepts an optional custom matcher function which is used to establish whether two items are equal or not. That matcher function may return a truthy value to indicate a match, or it could be a checker function which returns a dictionary containing a 'status' key, where the status being 'accomplished' indicates a match.
1294def correspondence_helper( 1295 pool1, 1296 pool2, 1297 indexset1, 1298 indexset2, 1299 lower_bound=0, 1300 matcher=lambda x, y: x == y, 1301 memo=None 1302): 1303 """ 1304 Helper for best_correspondence that also takes frozen sets of live 1305 indices for each each pool so that memoization can be applied. 1306 1307 The lower_bound argument allows it to abort early if it figures out 1308 that the upper bound for its match is smaller than that value. 1309 1310 Returns a pool1 -> pool2 mapping. 1311 1312 TODO: This function seems dangerously exponential in both time and 1313 memory... Can we do better? Should we do something else? 1314 """ 1315 if memo is None: 1316 memo = {} 1317 1318 if min(len(indexset1), len(indexset2)) < lower_bound: 1319 return None 1320 # special value indicating an aborted call; we don't memoize this. 1321 1322 # Look up a result if we've got one 1323 memo_key = (indexset1, indexset2) 1324 if memo_key in memo: 1325 return memo[memo_key] 1326 1327 # If either is exhausted, the result is an empty mapping: 1328 if len(indexset1) == 0 or len(indexset2) == 0: 1329 result = {} 1330 memo[memo_key] = result 1331 return result 1332 1333 # Find all possible matches of the first available key in the first 1334 # pool: 1335 all_matches = [] 1336 first_p1_index = next(indexset1.__iter__()) 1337 first_p1_item = pool1[first_p1_index] 1338 for p2_index in indexset2: 1339 if matcher(first_p1_item, pool2[p2_index]): # it's a match 1340 all_matches.append(p2_index) 1341 1342 # Now recurse for each possible match: 1343 best_so_far = None 1344 for p2_index in all_matches: 1345 sub_result = correspondence_helper( 1346 pool1, 1347 pool2, 1348 indexset1 - { first_p1_index }, 1349 indexset2 - { p2_index }, 1350 lower_bound=max(0, lower_bound - 1), 1351 # -1 because of the extra match we'll add to this result 1352 matcher=matcher, 1353 memo=memo 1354 ) 1355 1356 # Add match that we're assuming to this result 1357 full_result = { first_p1_index: p2_index } 1358 full_result.update(sub_result) 1359 1360 # remember best so far 1361 if ( 1362 sub_result is not None 1363 and (best_so_far is None or len(full_result) > len(best_so_far)) 1364 ): 1365 # Remember this result 1366 best_so_far = full_result 1367 # Move our lower bound up if it can 1368 lower_bound = max(lower_bound, len(best_so_far)) 1369 1370 # One more possibility: that we find a better result by leaving 1371 # first_p1_index unmatched entirely (or it can't match) 1372 unmatch_result = correspondence_helper( 1373 pool1, 1374 pool2, 1375 indexset1 - { first_p1_index }, 1376 indexset2, 1377 lower_bound=lower_bound, # same as 1378 matcher=matcher, 1379 memo=memo 1380 ) 1381 1382 # Update best_so_far: 1383 if ( 1384 unmatch_result is not None 1385 and (best_so_far is None or len(unmatch_result) > len(best_so_far)) 1386 ): 1387 best_so_far = unmatch_result 1388 # No need to update lower bound, as we're pretty much done 1389 # already 1390 1391 # Turn any possible remaining None subresults into empty dictionaries 1392 if best_so_far is None: 1393 best_so_far = {} 1394 1395 # Now we just return the best option that we found. 1396 memo[memo_key] = best_so_far 1397 return best_so_far
Helper for best_correspondence that also takes frozen sets of live indices for each each pool so that memoization can be applied.
The lower_bound argument allows it to abort early if it figures out that the upper bound for its match is smaller than that value.
Returns a pool1 -> pool2 mapping.
TODO: This function seems dangerously exponential in both time and memory... Can we do better? Should we do something else?
1400def best_correspondence(pool1, pool2, matcher=lambda x, y: x == y): 1401 """ 1402 Finds the best pairing of elements from the two given pools (which 1403 aren't necessarily the same size) such that each pair satisfies the 1404 given matcher function (default is the items must be equal) and the 1405 total number of pairs is maximized. The order of items in each pool 1406 is irrelevant, but the pools must be indexable, because this function 1407 returns a dictionary that maps indices in pool1 to indices in pool2. 1408 """ 1409 return correspondence_helper( 1410 pool1, 1411 pool2, 1412 frozenset(range(len(pool1))), 1413 frozenset(range(len(pool2))), 1414 matcher=matcher 1415 )
Finds the best pairing of elements from the two given pools (which aren't necessarily the same size) such that each pair satisfies the given matcher function (default is the items must be equal) and the total number of pairs is maximized. The order of items in each pool is irrelevant, but the pools must be indexable, because this function returns a dictionary that maps indices in pool1 to indices in pool2.
1418def sequence_diff( 1419 val, 1420 ref, 1421 order_matters=False, 1422 item_matcher=lambda x, y: x == y 1423): 1424 """ 1425 Returns a tuple of three lists: a list of matching items, a list of 1426 missing items that are present in the reference sequence but not the 1427 value sequence, and a list of extra items that are present in the 1428 value sequence but not the reference sequence. If order_matters is 1429 given as True, some items may be listed as both "missing" and "extra" 1430 if they appear out-of-order in the value sequence. 1431 1432 The item_matcher is used to determine what counts as a match, if it 1433 returns a truthy value, two items will be matched. However, it may 1434 also be a checker-style function that returns a dictionary with a 1435 'status' key, in which case a status value of 'accomplished' will 1436 count as a match and any other value will not. 1437 1438 TODO: Actually respect "partial" as an item_matcher result! 1439 """ 1440 # TODO: NOT THIS HACK! 1441 if order_matters is False: 1442 try: 1443 val = sorted(val) 1444 ref = sorted(ref) 1445 order_matters = True 1446 except Exception: 1447 pass 1448 1449 vhashables = [make_hashable(x) for x in val] 1450 rhashables = [make_hashable(x) for x in ref] 1451 1452 if order_matters: 1453 matching, missing, extra = [], [], [] 1454 1455 blocks = diff_blocks(vhashables, rhashables, matcher=item_matcher) 1456 1457 # Convert blocks into matching/missing/extra format 1458 last_vidx = 0 1459 last_ridx = 0 1460 for vidx, ridx, blen in blocks: 1461 matching.extend(val[idx] for idx in range(vidx, vidx + blen)) 1462 missing.extend(ref[idx] for idx in range(last_ridx, ridx)) 1463 extra.extend(val[idx] for idx in range(last_vidx, vidx)) 1464 1465 last_vidx = vidx + blen 1466 last_ridx = ridx + blen 1467 1468 # Catch extra missing/extra values (blocks might be empty) 1469 if last_vidx < len(val): 1470 extra.extend(val[idx] for idx in range(last_vidx, len(val))) 1471 if last_ridx < len(ref): 1472 missing.extend(ref[idx] for idx in range(last_ridx, len(ref))) 1473 1474 else: 1475 # Find the best unordered correspondence between the hashable 1476 # versions of the values and reference items. 1477 1478 # TODO: Safety value for values that are much longer than 1479 # expected? 1480 correspondence = best_correspondence( 1481 vhashables, 1482 rhashables, 1483 matcher=item_matcher 1484 ) 1485 matched_indices = set(correspondence.values()) 1486 1487 # Slower than sets but preserves ordering in each list 1488 matching = [ 1489 list(val)[idx] 1490 for idx in correspondence 1491 ] 1492 missing = [ 1493 list(ref)[idx] 1494 for idx in range(len(ref)) 1495 if idx not in matched_indices 1496 ] 1497 extra = [ 1498 list(val)[idx] 1499 for idx in range(len(val)) 1500 if idx not in correspondence 1501 ] 1502 1503 return matching, missing, extra
Returns a tuple of three lists: a list of matching items, a list of missing items that are present in the reference sequence but not the value sequence, and a list of extra items that are present in the value sequence but not the reference sequence. If order_matters is given as True, some items may be listed as both "missing" and "extra" if they appear out-of-order in the value sequence.
The item_matcher is used to determine what counts as a match, if it returns a truthy value, two items will be matched. However, it may also be a checker-style function that returns a dictionary with a 'status' key, in which case a status value of 'accomplished' will count as a match and any other value will not.
TODO: Actually respect "partial" as an item_matcher result!
1506def make_structure_comparator( 1507 tolerance="auto", 1508 order_matters=False, 1509 item_matcher=lambda x, y: x == y, 1510 item_repr=html_tools.dynamic_html_repr, 1511 key_repr=None, 1512 include_full_results=True 1513): 1514 """ 1515 Creates a comparator that looks at two data structures (composed of 1516 lists, tuples, dictionaries, and/or sets) and reports on their 1517 differences. If the tolerance is left at the default (the string 1518 'auto'), a partial success will be returned if only a few items 1519 are different at the top level of the given data structure. It may be 1520 given as 0, in which case only full success or failure will result. 1521 If it is a number other than 0, that many total missing + extra items 1522 will be tolerated, if it is not a number or the string "auto", it 1523 must be a function which will be given the reference value being 1524 compared to and which should return a number of different items to be 1525 tolerated. 1526 1527 If the order of the given sequence matters, set order_matters to 1528 True (only apply this to sets and dictionaries if you expect them to 1529 be ordered). 1530 1531 A custom item_matcher may be provided, in which case it will be 1532 applied to pairs of items from the two lists, tuples, or sets being 1533 compared to decide which are equal. Normally, any truthy value will 1534 count as a match, but if the return value from the item_matcher is a 1535 dictionary which contains a 'status' key, the match will only be 1536 counted if the status value is 'accomplished', which allows the use 1537 of checker-style functions as item matchers. 1538 1539 TODO: Would be nice to support partial matching at the item level, 1540 but that's hard! 1541 1542 When an item_matcher is provided for comparing dictionaries, it will 1543 be applied to key/value pairs (i.e., it will be given two arguments 1544 which are both tuples, not four arguments). Sometimes the values will 1545 be None during the first phase when only keys are being compared. 1546 1547 A custom item_repr function may be provided to be used to generate 1548 item representations in explanations where lists of 1549 missing/extra/moved items appear. A custom key_repr may also be 1550 provided for representing dictionary keys; if left out it defaults to 1551 the same thing as item_repr (which itself defaults to `repr`). 1552 1553 Both of these custom repr functions should be able to accept one 1554 argument: the thing to be represented. 1555 1556 If attempts to compare or list full results are not desired, 1557 include_full_results may be given as False. 1558 """ 1559 if tolerance == "auto": 1560 tolerance_fcn = lambda ref: min(len(ref) // 5, 3 + len(ref) // 50) 1561 elif isinstance(tolerance, (int, float)): 1562 tolerance_fcn = lambda ref: tolerance 1563 else: 1564 tolerance_fcn = tolerance 1565 1566 if key_repr is None: 1567 key_repr = item_repr 1568 1569 def compare_structures(val, ref): 1570 """ 1571 Comparator for structured data (lists, tuples, sets, and/or 1572 dictionaries) according to the arguments given to 1573 make_structure_comparator. 1574 """ 1575 if not isinstance(ref, (list, tuple, set, dict)): 1576 raise TypeError( 1577 "Reference value for compare_structures must" 1578 + " be a list, tuple, set, or dictionary." 1579 ) 1580 elif type(val) != type(ref): 1581 return { 1582 "status": "failed", 1583 "explanation": ( 1584 "Your result was a {} but should have been a {}." 1585 ).format(type(val).__name__, type(ref).__name__) 1586 } 1587 1588 # Define our tolerance value 1589 tolerance = tolerance_fcn(ref) 1590 1591 if isinstance(val, dict): 1592 kmatch, kmiss, kext = sequence_diff( 1593 val.keys(), 1594 ref.keys(), 1595 # order doens't matter for dict keys... 1596 # TODO: What if it's a subclass though...? 1597 item_matcher=lambda k1, k2: ( 1598 item_matcher((k1, None), (k2, None)) 1599 ) 1600 ) 1601 wrong = len(kmiss) + len(kext) 1602 if wrong > 0: 1603 if wrong > tolerance: 1604 status = "failed" 1605 expl = ( 1606 "Your result has the wrong keys (out of {}," 1607 + " {} matched).<br>\n" 1608 ).format( 1609 phrasing.obj_num(len(ref), "expected key"), 1610 len(kmatch) 1611 ) 1612 else: 1613 status = "partial" 1614 expl = ( 1615 "Your result has some incorrect keys (out of {}," 1616 + " {} matched).<br>\n" 1617 ).format( 1618 phrasing.obj_num(len(ref), "expected key"), 1619 len(kmatch) 1620 ) 1621 if kmiss: 1622 expl += html_tools.build_html_details( 1623 "Missing keys:", 1624 html_tools.build_list(key_repr(x) for x in kmiss) 1625 ) 1626 if kext: 1627 expl += html_tools.build_html_details( 1628 "Extra keys:", 1629 html_tools.build_list(key_repr(x) for x in kext) 1630 ) 1631 1632 if include_full_results: 1633 expl += html_tools.html_diff_table( 1634 html_tools.truncate( 1635 html_tools.big_repr(val), 1636 limit=5000 1637 ), 1638 html_tools.truncate( 1639 html_tools.big_repr(ref), 1640 limit=5000 1641 ), 1642 "Full result", 1643 "Expected result", 1644 "Detailed differences:" 1645 ) 1646 return { "status": status, "explanation": expl } 1647 1648 # otherwise, we can check values 1649 1650 imatch, imiss, iext = sequence_diff( 1651 list(val.items()), 1652 list(ref.items()), 1653 order_matters=order_matters, 1654 item_matcher=item_matcher 1655 ) 1656 1657 if order_matters: 1658 # Find items that moved 1659 imoved = [] 1660 imoved_missing = [] 1661 imoved_extra = [] 1662 for midx, miss in enumerate(imiss): 1663 if miss in iext: 1664 eidx = iext.index(miss) 1665 imoved_missing.append(midx) 1666 imoved_extra.append(eidx) 1667 imoved.append(miss) 1668 1669 imiss = [ 1670 imiss[idx] 1671 for idx in range(len(imiss)) 1672 if idx not in imoved_missing 1673 ] 1674 1675 iext = [ 1676 iext[idx] 1677 for idx in range(len(iext)) 1678 if idx not in imoved_extra 1679 ] 1680 1681 else: # order doesn't matter 1682 imoved = [] 1683 1684 wrong = len(imiss) + len(iext) + len(imoved) 1685 if wrong > 0: 1686 if wrong > tolerance: 1687 status = "failed" 1688 expl = ( 1689 "Your result has the right keys but some values" 1690 + " are wrong (out of {}, {} matched)." 1691 ).format( 1692 phrasing.obj_num(len(ref), "expected value"), 1693 len(imatch) 1694 ) 1695 else: 1696 status = "partial" 1697 expl = ( 1698 "Your result has the right keys but a few values" 1699 + " are wrong (out of {}, {} matched)." 1700 ).format( 1701 phrasing.obj_num(len(ref), "expected value"), 1702 len(imatch) 1703 ) 1704 if imiss: 1705 expl += html_tools.build_html_details( 1706 "Missing values (by key):", 1707 html_tools.build_list( 1708 key_repr(k) + ": " + item_repr(v) 1709 for (k, v) in imiss 1710 ) 1711 ) 1712 if iext: 1713 expl += html_tools.build_html_details( 1714 "Extra values (by key):", 1715 html_tools.build_list( 1716 key_repr(k) + ": " + item_repr(v) 1717 for (k, v) in iext 1718 ) 1719 ) 1720 if imoved: 1721 expl += html_tools.build_html_details( 1722 "Out-of-order values (by key):", 1723 html_tools.build_list( 1724 key_repr(k) + ": " + item_repr(v) 1725 for (k, v) in imoved 1726 ) 1727 ) 1728 if include_full_results: 1729 expl += html_tools.html_diff_table( 1730 html_tools.truncate( 1731 html_tools.big_repr(val), 1732 limit=5000 1733 ), 1734 html_tools.truncate( 1735 html_tools.big_repr(ref), 1736 limit=5000 1737 ), 1738 "Full result", 1739 "Expected result", 1740 "Detailed differences:" 1741 ) 1742 1743 return { "status": status, "explanation": expl } 1744 else: 1745 if include_full_results: 1746 results = "<br>\n" + html_tools.build_html_details( 1747 "Your result:", 1748 html_tools.dynamic_html_repr(val) 1749 ) 1750 else: 1751 results = "" 1752 1753 return { 1754 "status": "accomplished", 1755 "explanation": ( 1756 "Your result has the correct keys and values ({}" 1757 + " matched).{}" 1758 ).format( 1759 phrasing.obj_num(len(imatch), "item"), 1760 results 1761 ) 1762 } 1763 1764 else: # anything other than a dictionary 1765 matching, missing, extra = sequence_diff( 1766 val, 1767 ref, 1768 order_matters=order_matters, 1769 item_matcher=item_matcher 1770 ) 1771 1772 if order_matters: 1773 # Find items that moved 1774 moved = [] 1775 moved_missing = [] 1776 moved_extra = [] 1777 for midx, miss in enumerate(missing): 1778 for eidx, xtra in enumerate(extra): 1779 if item_matcher(miss, xtra): 1780 # TODO: What if multiple copies are out-of-order? 1781 moved_missing.append(midx) 1782 moved_extra.append(eidx) 1783 moved.append(miss) 1784 break 1785 1786 new_missing = [ 1787 missing[idx] 1788 for idx in range(len(missing)) 1789 if idx not in moved_missing 1790 ] 1791 missing = new_missing 1792 1793 new_extra = [ 1794 extra[idx] 1795 for idx in range(len(extra)) 1796 if idx not in moved_extra 1797 ] 1798 extra = new_extra 1799 1800 else: # order doesn't matter 1801 moved = [] 1802 1803 wrong = len(missing) + len(extra) + len(moved) 1804 if wrong > 0: 1805 if wrong > tolerance: 1806 status = "failed" 1807 expl = ( 1808 "Your result has the wrong values (out of {}," 1809 + " {} matched)." 1810 ).format( 1811 phrasing.obj_num(len(ref), "expected value"), 1812 phrasing.obj_num(len(matching), "value") 1813 ) 1814 else: 1815 status = "partial" 1816 expl = ( 1817 "Your result is mostly correct but some values" 1818 + " are wrong (out of {}, {} matched)." 1819 ).format( 1820 phrasing.obj_num(len(ref), "expected value"), 1821 phrasing.obj_num(len(matching), "value") 1822 ) 1823 # TODO: better management of small differences in large 1824 # structures here! 1825 if missing: 1826 expl += html_tools.build_html_details( 1827 "Missing values:", 1828 html_tools.build_list( 1829 item_repr(x) for x in missing 1830 ) 1831 ) 1832 if extra: 1833 expl += html_tools.build_html_details( 1834 "Extra values:", 1835 html_tools.build_list( 1836 item_repr(x) for x in extra 1837 ) 1838 ) 1839 if moved: 1840 expl += html_tools.build_html_details( 1841 "Out-of-order values:", 1842 html_tools.build_list( 1843 item_repr(x) for x in moved 1844 ) 1845 ) 1846 if include_full_results: 1847 # TODO: We'd like scrollable full-content diffs here, 1848 # but... that's a Project. 1849 expl += html_tools.html_diff_table( 1850 html_tools.truncate( 1851 html_tools.big_repr(val), 1852 limit=5000 1853 ), 1854 html_tools.truncate( 1855 html_tools.big_repr(ref), 1856 limit=5000 1857 ), 1858 "Full result", 1859 "Expected result", 1860 "Detailed differences:" 1861 ) 1862 return { "status": status, "explanation": expl } 1863 else: 1864 if include_full_results: 1865 results = "<br>\n" + html_tools.build_html_details( 1866 "Your result:", 1867 html_tools.dynamic_html_repr(val) 1868 ) 1869 else: 1870 results = "" 1871 return { 1872 "status": "accomplished", 1873 "explanation": ( 1874 "Your result has the correct values ({}" 1875 + " matched).{}" 1876 ).format( 1877 phrasing.obj_num(len(matching), "value"), 1878 results 1879 ) 1880 } 1881 1882 return compare_structures
Creates a comparator that looks at two data structures (composed of lists, tuples, dictionaries, and/or sets) and reports on their differences. If the tolerance is left at the default (the string 'auto'), a partial success will be returned if only a few items are different at the top level of the given data structure. It may be given as 0, in which case only full success or failure will result. If it is a number other than 0, that many total missing + extra items will be tolerated, if it is not a number or the string "auto", it must be a function which will be given the reference value being compared to and which should return a number of different items to be tolerated.
If the order of the given sequence matters, set order_matters to True (only apply this to sets and dictionaries if you expect them to be ordered).
A custom item_matcher may be provided, in which case it will be applied to pairs of items from the two lists, tuples, or sets being compared to decide which are equal. Normally, any truthy value will count as a match, but if the return value from the item_matcher is a dictionary which contains a 'status' key, the match will only be counted if the status value is 'accomplished', which allows the use of checker-style functions as item matchers.
TODO: Would be nice to support partial matching at the item level, but that's hard!
When an item_matcher is provided for comparing dictionaries, it will be applied to key/value pairs (i.e., it will be given two arguments which are both tuples, not four arguments). Sometimes the values will be None during the first phase when only keys are being compared.
A custom item_repr function may be provided to be used to generate
item representations in explanations where lists of
missing/extra/moved items appear. A custom key_repr may also be
provided for representing dictionary keys; if left out it defaults to
the same thing as item_repr (which itself defaults to repr
).
Both of these custom repr functions should be able to accept one argument: the thing to be represented.
If attempts to compare or list full results are not desired, include_full_results may be given as False.
1889def diff_anim_frames(val, ref, steps=10): 1890 """ 1891 Creates and returns a list of PIL.Image objects representing 1892 animation frames that fade between the two given images. A custom 1893 number of fade steps may be provided, which specifies the number of 1894 intermediate frames, so the result will have that many frames plus 1895 two. The fade is linear. 1896 """ 1897 import PIL.Image # we import here and let a potential error bubble out 1898 # TODO: Draw label text on anim frames! 1899 1900 result = [val] 1901 for step in range(1, steps + 1): 1902 t = step / (steps + 1) 1903 result.append(PIL.Image.blend(val, ref, t)) 1904 result.append(ref) 1905 return result
Creates and returns a list of PIL.Image objects representing animation frames that fade between the two given images. A custom number of fade steps may be provided, which specifies the number of intermediate frames, so the result will have that many frames plus two. The fade is linear.
1908def make_image_comparator( 1909 allowed_differences=0.03, 1910 partial_allowed=0.5, 1911 similarity_threshold=15 1912): 1913 """ 1914 Returns a comparison function suitable for use with 'image' context 1915 slots. It checks pixels in the two images (which it checks are 1916 actually images and are the same size) and returns a status of 1917 accomplished if the fraction of the area of the image in which pixels 1918 differ is less than or equal to the given allowed fraction. Pixels 1919 which have a 255-basis-RGB-space Euclidean distance (TODO: better 1920 distance metric) below the given similarity threshold are counted as 1921 1/2 of a pixel difference instead of a full pixel difference. (TODO: 1922 Better controls for this?). A partially-complete result is returned 1923 as long as the fractional area of different pixels is no more than 1924 the given partial_allowed value. 1925 """ 1926 import PIL.Image # we import here and let a potential error bubble out 1927 1928 def compare_images(val, ref): 1929 """ 1930 Compares two PILlow images, ensuring that the total percentage of 1931 pixels which differ between the two images is below a threshold. 1932 Pixels which differ only slightly will count as 1/2 a pixel in 1933 terms of the area difference computation. Images of different 1934 sizes will always be treated as totally different (TODO: More 1935 nuance there?). 1936 """ 1937 if not isinstance(ref, PIL.Image.Image): 1938 raise TypeError( 1939 f"Reference image capture failed (got a {type(ref)}," 1940 f" not a PIL.Image)" 1941 ) 1942 if not isinstance(val, PIL.Image.Image): 1943 return { 1944 "status": "failed", 1945 "explanation": ( 1946 f"Image capture failed (got a {type(val)}, not an" 1947 f" image)." 1948 ) 1949 } 1950 1951 if val.size != ref.size: 1952 return { 1953 "status": "failed", 1954 "explanation": ( 1955 f"Your image ({val.width}x{val.height}) did not" 1956 f" have the same size as the solution image" 1957 f" ({ref.width}x{ref.height})." 1958 ) 1959 } 1960 1961 # Should be 0-255 RGB data 1962 # TODO: Verify mode 1963 val_pixels = val.getdata() 1964 ref_pixels = ref.getdata() 1965 diff_area = 0 1966 half_diff_area = 0 1967 for index in range(len(val_pixels)): 1968 px = val_pixels[index] 1969 ref_px = ref_pixels[index] 1970 dist = sum((px[i] - ref_px[i])**2 for i in range(3))**0.5 1971 if 0 < dist <= similarity_threshold: 1972 half_diff_area += 1 1973 elif similarity_threshold < dist: 1974 diff_area += 1 1975 1976 # Build an HTML image comparison 1977 comparison = html_tools.build_html_tabs( 1978 [ 1979 # TODO: Proper alt text here! 1980 ( 1981 "Your image:", 1982 html_tools.html_image(val, "Your image") 1983 ), 1984 ( 1985 "Solution image:", 1986 html_tools.html_image(ref, "Solution image") 1987 ), 1988 ( 1989 "Animation:", 1990 html_tools.html_animation( 1991 diff_anim_frames(val, ref, 10), 1992 ( 1993 "An animation between your image and the" 1994 " solution image." 1995 ), 1996 delays=[500] + [100] * 10 + [500] 1997 ) 1998 ) 1999 ] 2000 ) 2001 2002 # Return a status based on the difference fraction 2003 diff_fraction = diff_area / len(val_pixels) 2004 half_diff_fraction = half_diff_area / len(val_pixels) 2005 diff_score = diff_fraction + half_diff_fraction / 2 2006 2007 diff_pct = round_to_sig_figs(diff_fraction * 100, 2) 2008 diff_msg = ( 2009 f"{diff_pct}% of pixels were" 2010 f" different" 2011 ) 2012 if half_diff_fraction > 0: 2013 half_pct = round_to_sig_figs(half_diff_fraction * 100, 2) 2014 diff_msg += ( 2015 f" and {half_pct}% of pixels were" 2016 f" somewhat different" 2017 ) 2018 2019 if diff_score == 0: 2020 return { 2021 "status": "accomplished", 2022 "explanation": ( 2023 f"Your image and the solution image were exactly the" 2024 f" same: {comparison}" 2025 ) 2026 } 2027 elif diff_score <= allowed_differences: 2028 return { 2029 "status": "accomplished", 2030 "explanation": ( 2031 f"Your image and the solution image were almost the" 2032 f" same ({diff_msg}): {comparison}" 2033 ) 2034 } 2035 elif diff_score <= partial_allowed: 2036 return { 2037 "status": "partial", 2038 "explanation": ( 2039 f"Your image and the solution image were similar," 2040 f" but not the same ({diff_msg}): {comparison}" 2041 ) 2042 } 2043 else: 2044 return { 2045 "status": "failed", 2046 "explanation": ( 2047 f"Your image and the solution image were not the" 2048 f" same ({diff_msg}): {comparison}" 2049 ) 2050 } 2051 2052 return compare_images
Returns a comparison function suitable for use with 'image' context slots. It checks pixels in the two images (which it checks are actually images and are the same size) and returns a status of accomplished if the fraction of the area of the image in which pixels differ is less than or equal to the given allowed fraction. Pixels which have a 255-basis-RGB-space Euclidean distance (TODO: better distance metric) below the given similarity threshold are counted as 1/2 of a pixel difference instead of a full pixel difference. (TODO: Better controls for this?). A partially-complete result is returned as long as the fractional area of different pixels is no more than the given partial_allowed value.
Threshold in terms of newlines before we treat a string as long.
Threshold in terms of raw characters even without any newlines before we treat a string as long.
2071def omni_compare(val, ref, memo=None): 2072 """ 2073 A one-size-kinda-fits-all comparison method (although note that it 2074 assumes order matters in lists and tuples). 2075 2076 Tries its best to give a concise-ish description of differences when 2077 they exist. 2078 """ 2079 if memo is None: 2080 memo = {} 2081 2082 vid = id(val) 2083 rid = id(ref) 2084 if vid in memo and rid in memo[vid]: 2085 return True 2086 # Recursive comparison that's already in-progress 2087 2088 memo.setdefault(vid, set()).add(rid) 2089 2090 # TODO: Better/safer here! 2091 try: 2092 matched = val == ref 2093 except RecursionError: 2094 matched = False # might be, but we'll have to check further 2095 2096 if matched: 2097 return { 2098 "status": "accomplished", 2099 "explanation": ( 2100 f"Actual value is the same as the reference" 2101 f" value.<br>\n{html_tools.dynamic_html_repr(val)}" 2102 ) 2103 } 2104 else: # let's hunt for differences 2105 if isinstance(val, bool) and isinstance(ref, bool): 2106 # Bools are isinstance of int so we need this case before the 2107 # one below 2108 return { 2109 "status": "failed", 2110 "explanation": ( 2111 f"Your result ({val}) was the opposite of the" 2112 f" expected result ({ref})." 2113 ) 2114 } 2115 elif ( 2116 isinstance(val, (int, float, complex)) 2117 and isinstance(ref, (int, float, complex)) 2118 ): # what if they're both numbers? 2119 ncomp = numbers_are_equalish(val, ref) 2120 if ncomp["status"] == "accomplished": # close-enough numbers 2121 return ncomp 2122 else: # not close-enough 2123 return { 2124 "status": "failed", 2125 "explanation": ( 2126 f"Your result ({val}) and the expected" 2127 f" result ({ref}) are consequentially" 2128 f" different numbers." 2129 ) 2130 } 2131 elif type(val) != type(ref): # different types; not both numbers 2132 vt = html_tools.escape(str(type(val).__name__)) 2133 rt = html_tools.escape(str(type(ref).__name__)) 2134 return { 2135 "status": "failed", 2136 "explanation": ( 2137 f"Your result was {phrasing.a_an(vt)}," 2138 f" but the correct result was" 2139 f" {phrasing.a_an(rt)}." 2140 ) 2141 } 2142 elif isinstance(val, str): # both strings 2143 val_lines = val.splitlines() 2144 ref_lines = ref.splitlines() 2145 if ( 2146 max(len(val_lines), len(ref_lines)) >= LONG_STRING_LINES 2147 or max(len(val), len(ref)) >= LONG_STRING_LEN 2148 ): # longish strings so there should be some tolerance 2149 return very_fuzzy_string_compare( 2150 val, 2151 ref, 2152 line_match_threshold=1.0, 2153 sequence_match_threshold=1.0, 2154 partial_sequence_threshold=0.5 2155 ) # partial success only unless equal 2156 else: # shorter strings so very little tolerance 2157 close = strings_are_equal_modulo_most_whitespace(val, ref) 2158 diff_table = html_tools.html_diff_table( 2159 val_lines, 2160 ref_lines, 2161 "Actual output", 2162 "Corresponding lines (if any) in expected output" 2163 ) 2164 if close["status"] == "accomplished": 2165 return { 2166 "status": "partial", 2167 "explanation": ( 2168 f"Strings aren't the same, but they're" 2169 f" close:<br>\n{diff_table}" 2170 ) 2171 } 2172 else: 2173 return { 2174 "status": "failed", 2175 "explanation": ( 2176 f"Actual value is different from the" 2177 f" reference value:<br>\n{diff_table}" 2178 ) 2179 } 2180 2181 elif isinstance(val, (list, tuple)): # same-type sequences 2182 # TODO: Less inefficient here! 2183 # TODO: More detailed explication of deep differences here? 2184 return make_structure_comparator( 2185 order_matters=True, 2186 item_matcher=lambda val, ref: omni_compare(val, ref, memo) 2187 )(val, ref) 2188 elif isinstance(val, (set, dict)): # both sets or dicts 2189 # TODO: Less inefficient here! 2190 # TODO: More detailed explication of deep differences here? 2191 return make_structure_comparator( 2192 order_matters=True, 2193 item_matcher=lambda val, ref: omni_compare(val, ref, memo) 2194 )(val, ref) 2195 else: # not sure what kind of thing this is... 2196 return { 2197 "status": "failed", 2198 "explanation": ( 2199 f"Your value ({html_tools.dynamic_html_repr(val)})" 2200 f" and the reference value" 2201 f" ({html_tools.dynamic_html_repr(ref)}) are not" 2202 f" the same." 2203 ) 2204 }
A one-size-kinda-fits-all comparison method (although note that it assumes order matters in lists and tuples).
Tries its best to give a concise-ish description of differences when they exist.