potluck.meta

Routines for checking whether specifications are correctly implemented and working as intended.

meta.py

  1"""
  2Routines for checking whether specifications are correctly implemented
  3and working as intended.
  4
  5meta.py
  6"""
  7
  8import re
  9
 10from . import file_utils
 11
 12
 13EXPECTATIONS = {}
 14"""
 15Global storage for expectations by spec module name, mode, and username.
 16Entries are module names with mode dictionaries as values, whose keys are
 17modes (i.e., "evaluation" or "validation") and whose values are
 18dictionaries that map usernames to lists of expectations.
 19"""
 20
 21CURRENT_EXAMPLE = None
 22"""
 23Which username are expectations automatically registered for?
 24"""
 25
 26
 27def simplify(description):
 28    """
 29    Normalizes case and removes HTML tags from the given goal
 30    description, for use in expectation matching. Note that angle
 31    brackets which aren't used for HTML tags are assumed to already be
 32    escaped. Adds '^^^' at the start and '$$$' at the end so that rules
 33    can use those anchors (or part of them) for disambiguation.
 34    """
 35    stripped = re.sub(r"<[^>]*>", '', description)
 36    return '^^^' + stripped.casefold() + '$$$'
 37
 38
 39def all_row_trails(report_or_row, trail_prefix=None):
 40    """
 41    Visits each row & sub-row of a report (or a report row) one by one.
 42    For each row visited, it yields a tuple containing that row, followed
 43    by a trail: a list of the first description entry of each ancestor of
 44    that row, starting from the top-level ancestor and going down and
 45    including that row.
 46
 47    If provided the given trail prefix (a list of strings) will be
 48    included before the start of each trail.
 49    """
 50    if trail_prefix is None:
 51        trail_prefix = []
 52
 53    if "table" in report_or_row:
 54        for entry in report_or_row["table"]:
 55            yield from all_row_trails(entry, trail_prefix)
 56    else:
 57        desc = report_or_row["description"][0]
 58        below = trail_prefix + [ desc ]
 59        if "subtable" in report_or_row:
 60            yield (report_or_row, below)
 61            for entry in report_or_row["subtable"]:
 62                yield from all_row_trails(entry, below)
 63        else:
 64            yield (report_or_row, below)
 65
 66
 67class ExpectedWarning:
 68    """
 69    An expected warning provides a heads-up that a warning containing
 70    certain text is expected, so that getting such a warning won't fail a
 71    check.
 72    """
 73    def __init__(self, message_fragment=''):
 74        """
 75        A message fragment should be provided, or by default all warnings
 76        will be ignored. If a fragment is provided, all warnings which
 77        include that fragment as part of their raw HTML code string will
 78        be ignored, but other warnings will not be.
 79        """
 80        self.fragment = message_fragment
 81
 82    def unexpected(self, warnings):
 83        """
 84        Returns all of the warnings from the given list which *aren't*
 85        expected, given this expectation that certain warning(s) might be
 86        present.
 87        """
 88        return [w for w in warnings if self.fragment not in w]
 89
 90
 91class Expectation:
 92    """
 93    An expectation establishes that a specific goal should evaluate to a
 94    specific result within a report. These expectations can be tested
 95    to make sure that a specification is working as designed.
 96
 97    To specify which goal the expectation applies to, there are two
 98    options: you can provide a fragment of the goal's identifier as a
 99    string (it must match exactly one goal; see
100    `potluck.rubrics.Rubric.goals_by_id`), or you can provide a list of
101    strings, each must uniquely match against an item in a report table
102    at a specific level, with the next string matching against that row's
103    sub-table, and so on. These matches are performed in a
104    case-insensitive manner with HTML tags stripped out, against the
105    primary obfuscated description entry for each goal/category. The
106    specified string only has to match part of the goal description, but
107    it must not match multiple goal descriptions at a given table level.
108    The characters '^^^' are added to the beginning of the rubric string,
109    and '$$$' to the end, to aid in disambiguation.
110
111    Because of these matching rules, for a rubric where the standard
112    metric `potluck.rubrics.core_extras_categorized_metric` is used,
113    goal paths are usually straightforward to construct when default
114    descriptions are in place. Some examples of both id-fragment and
115    goal-path methods:
116
117    - For a core FunctionDef Check for function 'foo':
118        `"core.check:def-foo$"` OR
119        `[ "procedure", "core", "define foo" ]`
120
121    - For an extra FunctionCall Check for 'bar' as a sub-rule of the
122        check above:
123        `"core.check:def-foo:call-bar$"` OR
124        `[ "procedure", "extra", "define foo", "call bar" ]`
125
126    - For a core trace test of function 'foo', assuming it was created
127        with group_name "trace":
128        `"core.test:foo:trace"` OR
129        `[ "process", "core", "the foo function must" ]`
130    - (note that one could also use:)
131        `"^goal:core.test:foo:trace$"` OR
132        `[ "process", "core", "^the foo function must" ]`
133
134    - For a core result value test of function 'foo' (with no group_name):
135        `"core.test:foo$"` OR
136        `[ "product", "core", "foo returns" ]`
137        (Note for the ID version, the $ is important to distinguish from
138        the case above.)
139
140    - For a core printed output test of function 'foo' (with group_name
141        "output"):
142        `"core.test:foo:output"` OR
143        `[ "behavior", "core", "foo prints" ]`
144    """
145    def __init__(self, goal_spec, expected_status):
146        """
147        The goal_spec is a list of strings specifying how to find the
148        goal in a report (strings are matched against descriptions to
149        find sub-tables). Alternatively, the goal_spec may be a single
150        string, which will must match a single goal in the rubric using
151        the same rules as `potluck.rubrics.Rubric.goals_by_id`. The
152        expected evaluation result is also required, which should be one
153        of the strings used for goal statuses (see
154        `potluck.rubrics.Goal`).
155
156        Note that the precise goal_spec list an `Expectation` should have
157        depends on the metric used and the details of how a
158        `potluck.rubrics.Rubric` object formulates its overall report,
159        because any top-level organizational report rows (e.g. for goal
160        types or categories) need to be accounted for. Specifying an
161        identifier fragment doesn't depend on the metric, but requires
162        understanding how identifiers are built up, and in some cases,
163        automatic deduplication of goal identifiers must be accounted
164        for.
165
166        For matching using a goal spec that's a list of strings, the
167        case-folded version of each goal_spec entry is checked using 'in'
168        against a case-folded version of each rubric entry at the
169        relevant level. Exactly 1 rubric entry must match.  The rubric
170        entries also have HTML tags stripped out, and have '^^^' added at
171        the front and '$$$' at the end to aid in disambiguation.
172
173        For example, if there are rubric entries named "Bug #1" and
174        "Bug #11", an expectation for the "Bug #1" rubric entry could use
175        "bug #1$" as its goal_spec entry.
176        """
177        self.goal_spec = goal_spec
178        self.expected_status = expected_status
179
180    def check(self, report):
181        """
182        Checks whether this expectation is fulfilled in a given report.
183        Returns a tuple containing:
184            1. Either True or False indicating success or failure.
185            2. A string description of why the check failed (or how it
186               succeeded).
187            3. A list of strings containing the full unmodified
188               initial descriptions of each report table row on the path
189               to the row that was checked. If the check failed because
190               it could not find the row it was looking for, this will be
191               None.
192        """
193        rows_here = report["table"]
194        if rows_here == [] and report["files"] == []:
195            raise ValueError("Report indicates no file to evaluate.")
196        found = None
197        trail = []
198
199        if isinstance(self.goal_spec, str):
200            candidates = []
201            all_ids = []
202            for (row, trail) in all_row_trails(report):
203                if 'id' in row:
204                    all_ids.append(row['id'])
205                    if self.goal_spec in ('^^^' + row['id'] + '$$$'):
206                        candidates.append((row, trail))
207
208            if (
209                len(all_ids) == 0
210            and (
211                    report["summary"]
212                 == "You did not submit any code for this task."
213                )
214            ):
215                return (
216                    False,
217                    "There was no submission.",
218                    None
219                )
220
221            if len(candidates) == 0:
222                options = '\n'.join(
223                    '#' + ident
224                    for ident in all_ids
225                )
226                return (
227                    False,
228                    (
229                        f"0 goals matched"
230                        f" '#{self.goal_spec}'. Available goal ids"
231                        f" are:\n{options}"
232                    ),
233                    None
234                )
235            elif len(candidates) > 1:
236                options = '\n'.join(
237                    '#' + row['id']
238                    for (row, trail) in candidates
239                )
240                return (
241                    False,
242                    (
243                        f"{len(candidates)} goals matched"
244                        f" '#{self.goal_spec}'. Matching goals"
245                        f" are:\n{options}"
246                    ),
247                    None
248                )
249
250            # We found one match:
251            found, trail = candidates[0]
252
253            # String for reporting where we are
254            where = "In " + ' → '.join(trail)
255
256        else: # we assume it's a collection of strings
257            # Match at each level of our goal path
258            for match_key in self.goal_spec:
259                # Match against descriptions at this level
260                matches_here = []
261                for row in rows_here:
262                    match_against = simplify(row["description"][0])
263                    look_for = match_key.casefold()
264                    if look_for in match_against:
265                        matches_here.append(row)
266
267                # Check # of matching rows
268                if len(matches_here) != 1: # zero or multiple matches
269                    if trail:
270                        where = "In " + ' → '.join(trail)
271                    else:
272                        where = "At the top level of the report"
273
274                    options = '\n'.join(
275                        row['description'][0]
276                        for row in rows_here
277                    )
278                    return (
279                        False,
280                        (
281                            f"{where}, {len(matches_here)} goals matched"
282                            f" '{match_key}'. Goals here are:\n{options}"
283                        ),
284                        None
285                    )
286                else: # a single match, as required
287                    # Record the goal or other table row we found:
288                    found = matches_here[0]
289                    # Extend our trail
290                    trail.append(found["description"][0])
291                    # Enter next level of the table:
292                    rows_here = found["subtable"]
293
294            # Strings for reporting our result
295            where = "In " + ' → '.join(trail)
296
297        # "found" should now be the matched goal's report row
298        if found["status"] == self.expected_status:
299            return (
300                True,
301                f"{where}, confirmed status '{self.expected_status}'.",
302                trail
303            )
304        else:
305            return (
306                False,
307                (
308                    f"{where}, status '{found['status']}' did not match"
309                    f" expected status '{self.expected_status}'."
310                ),
311                trail
312            )
313
314
315def check_entire_report(
316    report,
317    all_expectations,
318    default_level=0,
319    require_default="accomplished"
320):
321    """
322    Given a report and a list of `Expectation` and/or `ExpectedWarning`
323    objects, this function checks each of the expectations within the
324    provided report, returning a tuple containing True or False to
325    indicate success or failure, as well as a multi-line string
326    explaining which checks failed or that all checks succeeded.
327
328    If require_default is provided, then all rubric rows in the report
329    which don't have an explicit `Expectation` provided for them or a
330    sub-row at the given default_level must match the require_default
331    status. Set require_default to None (the default is 'accomplished')
332    to leave non-explicitly-checked rows unchecked.
333    """
334    explanation = "Some checks failed:\n"
335    coverage = {}
336    succeeded = True
337    unexpected_warnings = report["warnings"]
338    for exp in all_expectations:
339        if isinstance(exp, ExpectedWarning):
340            # Filter out warnings
341            before = len(unexpected_warnings)
342            unexpected_warnings = exp.unexpected(unexpected_warnings)
343            if len(unexpected_warnings) == before: # nothing was filtered
344                succeeded = False
345                explanation += (
346                    f"  Expected at least one warning containing the text"
347                    f" '{exp.fragment}', but no such warning was present."
348                    f"\n  (or it was filtered by a different warning"
349                    f" expectation.)\n"
350                )
351
352        elif isinstance(exp, Expectation):
353            # Test goal status
354            success, expl, path = exp.check(report)
355            if path is None:
356                if isinstance(exp.goal_spec, str):
357                    gs = '#' + exp.goal_spec
358                else:
359                    gs = ' → '.join(exp.goal_spec)
360                raise ValueError(
361                    "Unable to find expected goal:\n{}\n{}".format(gs, expl)
362                )
363            c = coverage
364            for entry in path:
365                c = c.setdefault(entry, {})
366            c[None] = True
367            if not success:
368                explanation += expl + '\n'
369                succeeded = False
370
371        else:
372            raise TypeError(f"Invalid expectation type: {type(exp)}")
373
374    default_count = 0
375    if require_default is not None:
376        def check_default_statuses(rows, covered, path):
377            """
378            Checks that the status of every row at a certain default
379            level within the report hierarchy is equal to the required
380            default status. Needs a list of rows at this level of the
381            table, a dictionary of covered paths pertaining to this
382            level of the table, and a list of strings indicating the
383            path taken to get to this part of the table.
384
385            Returns a tuple starting with True or False for success or
386            failure, followed by a string describing the failure(s) or
387            explaining the success.
388            """
389            nonlocal default_count, default_level, require_default
390            passed = True
391            explanation = ""
392            level = len(path)
393            if level == default_level: # Check each non-covered row
394                for row in rows:
395                    desc = row["description"][0]
396                    if desc in covered and covered[desc].get(None, False):
397                        continue # don't check this covered row
398                    else:
399                        default_count += 1
400                        if row["status"] != require_default:
401                            where = "In " + " → ".join(path + [desc])
402                            explanation += (
403                                f"{where} status '{row['status']}' did"
404                                f" not match required default status"
405                                f" '{require_default}'.\n"
406                            )
407                            passed = False
408            else: # Recurse
409                for row in rows:
410                    desc = row["description"][0]
411                    subtable = row["subtable"]
412                    sub_success, sub_expl = check_default_statuses(
413                        subtable,
414                        covered.get(desc, {}),
415                        path + [desc]
416                    )
417                    if not sub_success:
418                        passed = False
419                        explanation += sub_expl
420
421            if passed:
422                explanation = (
423                    f"All non-expected statuses were"
424                    f" '{require_default}'."
425                )
426            return passed, explanation
427
428        default_success, default_expl = check_default_statuses(
429            report["table"],
430            coverage,
431            path=[]
432        )
433        if not default_success:
434            succeeded = False
435            explanation += default_expl
436
437    if succeeded:
438        explanation = "All {} expectation(s){} were met.".format(
439            len(all_expectations),
440            (
441                f" (plus {default_count} default expectation(s))"
442                if default_count > 0
443                else ""
444            )
445        )
446
447    # Check for warnings and replace/augment explanation
448    if len(unexpected_warnings) > 0:
449        wmsg = (
450            "The report included unexpected warnings:\n  "
451          + "\n ".join(unexpected_warnings)
452        )
453        if succeeded:
454            explanation = wmsg
455        else:
456            explanation = wmsg + '\n' + explanation
457
458        succeeded = False
459
460    return (succeeded, explanation)
461
462
463def example(username, extra_modes=()):
464    """
465    Registers a current username such that calls to `expect` and/or
466    `expect_validation` create expectations for that example submission,
467    and creates an "evaluation" entry in the expectations table for it so
468    that even if no expectations are established it will still be tested
469    using default expectations.
470
471    If extra_modes is provided, it should be a list of strings naming
472    extra modes to check (e.g., ["validation"]). Note that as soon as an
473    expectation is established for any mode that mode will be checked
474    even if it wasn't specified here.
475    """
476    global CURRENT_EXAMPLE
477    CURRENT_EXAMPLE = username
478    mname = file_utils.get_spec_module_name()
479    for mode in ["evaluation"] + list(extra_modes):
480        EXPECTATIONS\
481            .setdefault(mname, {})\
482            .setdefault(mode, {})\
483            .setdefault(username, [])
484
485
486def expect(status, *id_or_path, mode="evaluation"):
487    """
488    Creates an `Expectation` object and registers it under the current
489    example username as an evaluation expectation.
490
491    Arguments are:
492
493    - status: The expected status. See `potluck.rubrics.Goal`.
494    - id_or_path: One or more additional strings specifying which goal we're
495        targeting (see `Expectation`). May also be a single string that
496        starts with '#' to specify the goal using its identifier instead
497        of a rubric-description-path. If it's a single string, it should
498        start with the goal type and then category when using the default
499        rubric metric.
500    - mode: Keyword-only; sets which mode of testing the expectation
501        applies to. Valid modes are "evaluation" (the default) and
502        "validation".
503    """
504    mname = file_utils.get_spec_module_name()
505
506    if len(id_or_path) == 1 and id_or_path[0].startswith('#'):
507        goal_spec = id_or_path[0][1:]
508    else:
509        goal_spec = id_or_path
510
511    EXPECTATIONS\
512        .setdefault(mname, {})\
513        .setdefault(mode, {})\
514        .setdefault(CURRENT_EXAMPLE, [])\
515        .append(
516            Expectation(
517                goal_spec,
518                status
519            )
520        )
521
522
523def expect_validation(*args):
524    """
525    Establishes an expectation for the validation step. This is just a
526    shortcut for calling `expect` with mode set to "validation".
527    """
528    expect(*args, mode="validation")
529
530
531def expect_warnings(fragment='', mode="evaluation"):
532    """
533    Creates an `ExpectedWarning` object and registers it under the
534    current example username. Registers for evaluation by default, but
535    you can specify a different mode (e.g., "validation").
536
537    The `fragment` argument if omitted will cause all warnings to be
538    treated as expected, but if provided, only warnings whose raw HTML
539    message string contains that fragment as a substring will be treated
540    as expected.
541    """
542    mname = file_utils.get_spec_module_name()
543
544    EXPECTATIONS\
545        .setdefault(mname, {})\
546        .setdefault(mode, {})\
547        .setdefault(CURRENT_EXAMPLE, [])\
548        .append(ExpectedWarning(fragment))
549
550
551def get_expectations(spec_module, mode="evaluation"):
552    """
553    Returns all expectations for the given specification module and mode,
554    as a dictionary mapping user IDs to expectation lists. Returns None
555    if there are no expectations for the target mode or for the target
556    module, and an empty expectation set hasn't been set up either.
557    """
558    return EXPECTATIONS.get(spec_module.__name__, {}).get(mode, None)
EXPECTATIONS = {}

Global storage for expectations by spec module name, mode, and username. Entries are module names with mode dictionaries as values, whose keys are modes (i.e., "evaluation" or "validation") and whose values are dictionaries that map usernames to lists of expectations.

CURRENT_EXAMPLE = None

Which username are expectations automatically registered for?

def simplify(description):
28def simplify(description):
29    """
30    Normalizes case and removes HTML tags from the given goal
31    description, for use in expectation matching. Note that angle
32    brackets which aren't used for HTML tags are assumed to already be
33    escaped. Adds '^^^' at the start and '$$$' at the end so that rules
34    can use those anchors (or part of them) for disambiguation.
35    """
36    stripped = re.sub(r"<[^>]*>", '', description)
37    return '^^^' + stripped.casefold() + '$$$'

Normalizes case and removes HTML tags from the given goal description, for use in expectation matching. Note that angle brackets which aren't used for HTML tags are assumed to already be escaped. Adds '^^^' at the start and '$$$' at the end so that rules can use those anchors (or part of them) for disambiguation.

def all_row_trails(report_or_row, trail_prefix=None):
40def all_row_trails(report_or_row, trail_prefix=None):
41    """
42    Visits each row & sub-row of a report (or a report row) one by one.
43    For each row visited, it yields a tuple containing that row, followed
44    by a trail: a list of the first description entry of each ancestor of
45    that row, starting from the top-level ancestor and going down and
46    including that row.
47
48    If provided the given trail prefix (a list of strings) will be
49    included before the start of each trail.
50    """
51    if trail_prefix is None:
52        trail_prefix = []
53
54    if "table" in report_or_row:
55        for entry in report_or_row["table"]:
56            yield from all_row_trails(entry, trail_prefix)
57    else:
58        desc = report_or_row["description"][0]
59        below = trail_prefix + [ desc ]
60        if "subtable" in report_or_row:
61            yield (report_or_row, below)
62            for entry in report_or_row["subtable"]:
63                yield from all_row_trails(entry, below)
64        else:
65            yield (report_or_row, below)

Visits each row & sub-row of a report (or a report row) one by one. For each row visited, it yields a tuple containing that row, followed by a trail: a list of the first description entry of each ancestor of that row, starting from the top-level ancestor and going down and including that row.

If provided the given trail prefix (a list of strings) will be included before the start of each trail.

class ExpectedWarning:
68class ExpectedWarning:
69    """
70    An expected warning provides a heads-up that a warning containing
71    certain text is expected, so that getting such a warning won't fail a
72    check.
73    """
74    def __init__(self, message_fragment=''):
75        """
76        A message fragment should be provided, or by default all warnings
77        will be ignored. If a fragment is provided, all warnings which
78        include that fragment as part of their raw HTML code string will
79        be ignored, but other warnings will not be.
80        """
81        self.fragment = message_fragment
82
83    def unexpected(self, warnings):
84        """
85        Returns all of the warnings from the given list which *aren't*
86        expected, given this expectation that certain warning(s) might be
87        present.
88        """
89        return [w for w in warnings if self.fragment not in w]

An expected warning provides a heads-up that a warning containing certain text is expected, so that getting such a warning won't fail a check.

ExpectedWarning(message_fragment='')
74    def __init__(self, message_fragment=''):
75        """
76        A message fragment should be provided, or by default all warnings
77        will be ignored. If a fragment is provided, all warnings which
78        include that fragment as part of their raw HTML code string will
79        be ignored, but other warnings will not be.
80        """
81        self.fragment = message_fragment

A message fragment should be provided, or by default all warnings will be ignored. If a fragment is provided, all warnings which include that fragment as part of their raw HTML code string will be ignored, but other warnings will not be.

def unexpected(self, warnings):
83    def unexpected(self, warnings):
84        """
85        Returns all of the warnings from the given list which *aren't*
86        expected, given this expectation that certain warning(s) might be
87        present.
88        """
89        return [w for w in warnings if self.fragment not in w]

Returns all of the warnings from the given list which aren't expected, given this expectation that certain warning(s) might be present.

class Expectation:
 92class Expectation:
 93    """
 94    An expectation establishes that a specific goal should evaluate to a
 95    specific result within a report. These expectations can be tested
 96    to make sure that a specification is working as designed.
 97
 98    To specify which goal the expectation applies to, there are two
 99    options: you can provide a fragment of the goal's identifier as a
100    string (it must match exactly one goal; see
101    `potluck.rubrics.Rubric.goals_by_id`), or you can provide a list of
102    strings, each must uniquely match against an item in a report table
103    at a specific level, with the next string matching against that row's
104    sub-table, and so on. These matches are performed in a
105    case-insensitive manner with HTML tags stripped out, against the
106    primary obfuscated description entry for each goal/category. The
107    specified string only has to match part of the goal description, but
108    it must not match multiple goal descriptions at a given table level.
109    The characters '^^^' are added to the beginning of the rubric string,
110    and '$$$' to the end, to aid in disambiguation.
111
112    Because of these matching rules, for a rubric where the standard
113    metric `potluck.rubrics.core_extras_categorized_metric` is used,
114    goal paths are usually straightforward to construct when default
115    descriptions are in place. Some examples of both id-fragment and
116    goal-path methods:
117
118    - For a core FunctionDef Check for function 'foo':
119        `"core.check:def-foo$"` OR
120        `[ "procedure", "core", "define foo" ]`
121
122    - For an extra FunctionCall Check for 'bar' as a sub-rule of the
123        check above:
124        `"core.check:def-foo:call-bar$"` OR
125        `[ "procedure", "extra", "define foo", "call bar" ]`
126
127    - For a core trace test of function 'foo', assuming it was created
128        with group_name "trace":
129        `"core.test:foo:trace"` OR
130        `[ "process", "core", "the foo function must" ]`
131    - (note that one could also use:)
132        `"^goal:core.test:foo:trace$"` OR
133        `[ "process", "core", "^the foo function must" ]`
134
135    - For a core result value test of function 'foo' (with no group_name):
136        `"core.test:foo$"` OR
137        `[ "product", "core", "foo returns" ]`
138        (Note for the ID version, the $ is important to distinguish from
139        the case above.)
140
141    - For a core printed output test of function 'foo' (with group_name
142        "output"):
143        `"core.test:foo:output"` OR
144        `[ "behavior", "core", "foo prints" ]`
145    """
146    def __init__(self, goal_spec, expected_status):
147        """
148        The goal_spec is a list of strings specifying how to find the
149        goal in a report (strings are matched against descriptions to
150        find sub-tables). Alternatively, the goal_spec may be a single
151        string, which will must match a single goal in the rubric using
152        the same rules as `potluck.rubrics.Rubric.goals_by_id`. The
153        expected evaluation result is also required, which should be one
154        of the strings used for goal statuses (see
155        `potluck.rubrics.Goal`).
156
157        Note that the precise goal_spec list an `Expectation` should have
158        depends on the metric used and the details of how a
159        `potluck.rubrics.Rubric` object formulates its overall report,
160        because any top-level organizational report rows (e.g. for goal
161        types or categories) need to be accounted for. Specifying an
162        identifier fragment doesn't depend on the metric, but requires
163        understanding how identifiers are built up, and in some cases,
164        automatic deduplication of goal identifiers must be accounted
165        for.
166
167        For matching using a goal spec that's a list of strings, the
168        case-folded version of each goal_spec entry is checked using 'in'
169        against a case-folded version of each rubric entry at the
170        relevant level. Exactly 1 rubric entry must match.  The rubric
171        entries also have HTML tags stripped out, and have '^^^' added at
172        the front and '$$$' at the end to aid in disambiguation.
173
174        For example, if there are rubric entries named "Bug #1" and
175        "Bug #11", an expectation for the "Bug #1" rubric entry could use
176        "bug #1$" as its goal_spec entry.
177        """
178        self.goal_spec = goal_spec
179        self.expected_status = expected_status
180
181    def check(self, report):
182        """
183        Checks whether this expectation is fulfilled in a given report.
184        Returns a tuple containing:
185            1. Either True or False indicating success or failure.
186            2. A string description of why the check failed (or how it
187               succeeded).
188            3. A list of strings containing the full unmodified
189               initial descriptions of each report table row on the path
190               to the row that was checked. If the check failed because
191               it could not find the row it was looking for, this will be
192               None.
193        """
194        rows_here = report["table"]
195        if rows_here == [] and report["files"] == []:
196            raise ValueError("Report indicates no file to evaluate.")
197        found = None
198        trail = []
199
200        if isinstance(self.goal_spec, str):
201            candidates = []
202            all_ids = []
203            for (row, trail) in all_row_trails(report):
204                if 'id' in row:
205                    all_ids.append(row['id'])
206                    if self.goal_spec in ('^^^' + row['id'] + '$$$'):
207                        candidates.append((row, trail))
208
209            if (
210                len(all_ids) == 0
211            and (
212                    report["summary"]
213                 == "You did not submit any code for this task."
214                )
215            ):
216                return (
217                    False,
218                    "There was no submission.",
219                    None
220                )
221
222            if len(candidates) == 0:
223                options = '\n'.join(
224                    '#' + ident
225                    for ident in all_ids
226                )
227                return (
228                    False,
229                    (
230                        f"0 goals matched"
231                        f" '#{self.goal_spec}'. Available goal ids"
232                        f" are:\n{options}"
233                    ),
234                    None
235                )
236            elif len(candidates) > 1:
237                options = '\n'.join(
238                    '#' + row['id']
239                    for (row, trail) in candidates
240                )
241                return (
242                    False,
243                    (
244                        f"{len(candidates)} goals matched"
245                        f" '#{self.goal_spec}'. Matching goals"
246                        f" are:\n{options}"
247                    ),
248                    None
249                )
250
251            # We found one match:
252            found, trail = candidates[0]
253
254            # String for reporting where we are
255            where = "In " + ' → '.join(trail)
256
257        else: # we assume it's a collection of strings
258            # Match at each level of our goal path
259            for match_key in self.goal_spec:
260                # Match against descriptions at this level
261                matches_here = []
262                for row in rows_here:
263                    match_against = simplify(row["description"][0])
264                    look_for = match_key.casefold()
265                    if look_for in match_against:
266                        matches_here.append(row)
267
268                # Check # of matching rows
269                if len(matches_here) != 1: # zero or multiple matches
270                    if trail:
271                        where = "In " + ' → '.join(trail)
272                    else:
273                        where = "At the top level of the report"
274
275                    options = '\n'.join(
276                        row['description'][0]
277                        for row in rows_here
278                    )
279                    return (
280                        False,
281                        (
282                            f"{where}, {len(matches_here)} goals matched"
283                            f" '{match_key}'. Goals here are:\n{options}"
284                        ),
285                        None
286                    )
287                else: # a single match, as required
288                    # Record the goal or other table row we found:
289                    found = matches_here[0]
290                    # Extend our trail
291                    trail.append(found["description"][0])
292                    # Enter next level of the table:
293                    rows_here = found["subtable"]
294
295            # Strings for reporting our result
296            where = "In " + ' → '.join(trail)
297
298        # "found" should now be the matched goal's report row
299        if found["status"] == self.expected_status:
300            return (
301                True,
302                f"{where}, confirmed status '{self.expected_status}'.",
303                trail
304            )
305        else:
306            return (
307                False,
308                (
309                    f"{where}, status '{found['status']}' did not match"
310                    f" expected status '{self.expected_status}'."
311                ),
312                trail
313            )

An expectation establishes that a specific goal should evaluate to a specific result within a report. These expectations can be tested to make sure that a specification is working as designed.

To specify which goal the expectation applies to, there are two options: you can provide a fragment of the goal's identifier as a string (it must match exactly one goal; see potluck.rubrics.Rubric.goals_by_id), or you can provide a list of strings, each must uniquely match against an item in a report table at a specific level, with the next string matching against that row's sub-table, and so on. These matches are performed in a case-insensitive manner with HTML tags stripped out, against the primary obfuscated description entry for each goal/category. The specified string only has to match part of the goal description, but it must not match multiple goal descriptions at a given table level. The characters '^^^' are added to the beginning of the rubric string, and '$$$' to the end, to aid in disambiguation.

Because of these matching rules, for a rubric where the standard metric potluck.rubrics.core_extras_categorized_metric is used, goal paths are usually straightforward to construct when default descriptions are in place. Some examples of both id-fragment and goal-path methods:

  • For a core FunctionDef Check for function 'foo': "core.check:def-foo$" OR [ "procedure", "core", "define foo" ]

  • For an extra FunctionCall Check for 'bar' as a sub-rule of the check above: "core.check:def-foo:call-bar$" OR [ "procedure", "extra", "define foo", "call bar" ]

  • For a core trace test of function 'foo', assuming it was created with group_name "trace": "core.test:foo:trace" OR [ "process", "core", "the foo function must" ]

  • (note that one could also use:) "^goal:core.test:foo:trace$" OR [ "process", "core", "^the foo function must" ]

  • For a core result value test of function 'foo' (with no group_name): "core.test:foo$" OR [ "product", "core", "foo returns" ] (Note for the ID version, the $ is important to distinguish from the case above.)

  • For a core printed output test of function 'foo' (with group_name "output"): "core.test:foo:output" OR [ "behavior", "core", "foo prints" ]

Expectation(goal_spec, expected_status)
146    def __init__(self, goal_spec, expected_status):
147        """
148        The goal_spec is a list of strings specifying how to find the
149        goal in a report (strings are matched against descriptions to
150        find sub-tables). Alternatively, the goal_spec may be a single
151        string, which will must match a single goal in the rubric using
152        the same rules as `potluck.rubrics.Rubric.goals_by_id`. The
153        expected evaluation result is also required, which should be one
154        of the strings used for goal statuses (see
155        `potluck.rubrics.Goal`).
156
157        Note that the precise goal_spec list an `Expectation` should have
158        depends on the metric used and the details of how a
159        `potluck.rubrics.Rubric` object formulates its overall report,
160        because any top-level organizational report rows (e.g. for goal
161        types or categories) need to be accounted for. Specifying an
162        identifier fragment doesn't depend on the metric, but requires
163        understanding how identifiers are built up, and in some cases,
164        automatic deduplication of goal identifiers must be accounted
165        for.
166
167        For matching using a goal spec that's a list of strings, the
168        case-folded version of each goal_spec entry is checked using 'in'
169        against a case-folded version of each rubric entry at the
170        relevant level. Exactly 1 rubric entry must match.  The rubric
171        entries also have HTML tags stripped out, and have '^^^' added at
172        the front and '$$$' at the end to aid in disambiguation.
173
174        For example, if there are rubric entries named "Bug #1" and
175        "Bug #11", an expectation for the "Bug #1" rubric entry could use
176        "bug #1$" as its goal_spec entry.
177        """
178        self.goal_spec = goal_spec
179        self.expected_status = expected_status

The goal_spec is a list of strings specifying how to find the goal in a report (strings are matched against descriptions to find sub-tables). Alternatively, the goal_spec may be a single string, which will must match a single goal in the rubric using the same rules as potluck.rubrics.Rubric.goals_by_id. The expected evaluation result is also required, which should be one of the strings used for goal statuses (see potluck.rubrics.Goal).

Note that the precise goal_spec list an Expectation should have depends on the metric used and the details of how a potluck.rubrics.Rubric object formulates its overall report, because any top-level organizational report rows (e.g. for goal types or categories) need to be accounted for. Specifying an identifier fragment doesn't depend on the metric, but requires understanding how identifiers are built up, and in some cases, automatic deduplication of goal identifiers must be accounted for.

For matching using a goal spec that's a list of strings, the case-folded version of each goal_spec entry is checked using 'in' against a case-folded version of each rubric entry at the relevant level. Exactly 1 rubric entry must match. The rubric entries also have HTML tags stripped out, and have '^^^' added at the front and '$$$' at the end to aid in disambiguation.

For example, if there are rubric entries named "Bug #1" and "Bug #11", an expectation for the "Bug #1" rubric entry could use "bug #1$" as its goal_spec entry.

def check(self, report):
181    def check(self, report):
182        """
183        Checks whether this expectation is fulfilled in a given report.
184        Returns a tuple containing:
185            1. Either True or False indicating success or failure.
186            2. A string description of why the check failed (or how it
187               succeeded).
188            3. A list of strings containing the full unmodified
189               initial descriptions of each report table row on the path
190               to the row that was checked. If the check failed because
191               it could not find the row it was looking for, this will be
192               None.
193        """
194        rows_here = report["table"]
195        if rows_here == [] and report["files"] == []:
196            raise ValueError("Report indicates no file to evaluate.")
197        found = None
198        trail = []
199
200        if isinstance(self.goal_spec, str):
201            candidates = []
202            all_ids = []
203            for (row, trail) in all_row_trails(report):
204                if 'id' in row:
205                    all_ids.append(row['id'])
206                    if self.goal_spec in ('^^^' + row['id'] + '$$$'):
207                        candidates.append((row, trail))
208
209            if (
210                len(all_ids) == 0
211            and (
212                    report["summary"]
213                 == "You did not submit any code for this task."
214                )
215            ):
216                return (
217                    False,
218                    "There was no submission.",
219                    None
220                )
221
222            if len(candidates) == 0:
223                options = '\n'.join(
224                    '#' + ident
225                    for ident in all_ids
226                )
227                return (
228                    False,
229                    (
230                        f"0 goals matched"
231                        f" '#{self.goal_spec}'. Available goal ids"
232                        f" are:\n{options}"
233                    ),
234                    None
235                )
236            elif len(candidates) > 1:
237                options = '\n'.join(
238                    '#' + row['id']
239                    for (row, trail) in candidates
240                )
241                return (
242                    False,
243                    (
244                        f"{len(candidates)} goals matched"
245                        f" '#{self.goal_spec}'. Matching goals"
246                        f" are:\n{options}"
247                    ),
248                    None
249                )
250
251            # We found one match:
252            found, trail = candidates[0]
253
254            # String for reporting where we are
255            where = "In " + ' → '.join(trail)
256
257        else: # we assume it's a collection of strings
258            # Match at each level of our goal path
259            for match_key in self.goal_spec:
260                # Match against descriptions at this level
261                matches_here = []
262                for row in rows_here:
263                    match_against = simplify(row["description"][0])
264                    look_for = match_key.casefold()
265                    if look_for in match_against:
266                        matches_here.append(row)
267
268                # Check # of matching rows
269                if len(matches_here) != 1: # zero or multiple matches
270                    if trail:
271                        where = "In " + ' → '.join(trail)
272                    else:
273                        where = "At the top level of the report"
274
275                    options = '\n'.join(
276                        row['description'][0]
277                        for row in rows_here
278                    )
279                    return (
280                        False,
281                        (
282                            f"{where}, {len(matches_here)} goals matched"
283                            f" '{match_key}'. Goals here are:\n{options}"
284                        ),
285                        None
286                    )
287                else: # a single match, as required
288                    # Record the goal or other table row we found:
289                    found = matches_here[0]
290                    # Extend our trail
291                    trail.append(found["description"][0])
292                    # Enter next level of the table:
293                    rows_here = found["subtable"]
294
295            # Strings for reporting our result
296            where = "In " + ' → '.join(trail)
297
298        # "found" should now be the matched goal's report row
299        if found["status"] == self.expected_status:
300            return (
301                True,
302                f"{where}, confirmed status '{self.expected_status}'.",
303                trail
304            )
305        else:
306            return (
307                False,
308                (
309                    f"{where}, status '{found['status']}' did not match"
310                    f" expected status '{self.expected_status}'."
311                ),
312                trail
313            )

Checks whether this expectation is fulfilled in a given report. Returns a tuple containing: 1. Either True or False indicating success or failure. 2. A string description of why the check failed (or how it succeeded). 3. A list of strings containing the full unmodified initial descriptions of each report table row on the path to the row that was checked. If the check failed because it could not find the row it was looking for, this will be None.

def check_entire_report( report, all_expectations, default_level=0, require_default='accomplished'):
316def check_entire_report(
317    report,
318    all_expectations,
319    default_level=0,
320    require_default="accomplished"
321):
322    """
323    Given a report and a list of `Expectation` and/or `ExpectedWarning`
324    objects, this function checks each of the expectations within the
325    provided report, returning a tuple containing True or False to
326    indicate success or failure, as well as a multi-line string
327    explaining which checks failed or that all checks succeeded.
328
329    If require_default is provided, then all rubric rows in the report
330    which don't have an explicit `Expectation` provided for them or a
331    sub-row at the given default_level must match the require_default
332    status. Set require_default to None (the default is 'accomplished')
333    to leave non-explicitly-checked rows unchecked.
334    """
335    explanation = "Some checks failed:\n"
336    coverage = {}
337    succeeded = True
338    unexpected_warnings = report["warnings"]
339    for exp in all_expectations:
340        if isinstance(exp, ExpectedWarning):
341            # Filter out warnings
342            before = len(unexpected_warnings)
343            unexpected_warnings = exp.unexpected(unexpected_warnings)
344            if len(unexpected_warnings) == before: # nothing was filtered
345                succeeded = False
346                explanation += (
347                    f"  Expected at least one warning containing the text"
348                    f" '{exp.fragment}', but no such warning was present."
349                    f"\n  (or it was filtered by a different warning"
350                    f" expectation.)\n"
351                )
352
353        elif isinstance(exp, Expectation):
354            # Test goal status
355            success, expl, path = exp.check(report)
356            if path is None:
357                if isinstance(exp.goal_spec, str):
358                    gs = '#' + exp.goal_spec
359                else:
360                    gs = ' → '.join(exp.goal_spec)
361                raise ValueError(
362                    "Unable to find expected goal:\n{}\n{}".format(gs, expl)
363                )
364            c = coverage
365            for entry in path:
366                c = c.setdefault(entry, {})
367            c[None] = True
368            if not success:
369                explanation += expl + '\n'
370                succeeded = False
371
372        else:
373            raise TypeError(f"Invalid expectation type: {type(exp)}")
374
375    default_count = 0
376    if require_default is not None:
377        def check_default_statuses(rows, covered, path):
378            """
379            Checks that the status of every row at a certain default
380            level within the report hierarchy is equal to the required
381            default status. Needs a list of rows at this level of the
382            table, a dictionary of covered paths pertaining to this
383            level of the table, and a list of strings indicating the
384            path taken to get to this part of the table.
385
386            Returns a tuple starting with True or False for success or
387            failure, followed by a string describing the failure(s) or
388            explaining the success.
389            """
390            nonlocal default_count, default_level, require_default
391            passed = True
392            explanation = ""
393            level = len(path)
394            if level == default_level: # Check each non-covered row
395                for row in rows:
396                    desc = row["description"][0]
397                    if desc in covered and covered[desc].get(None, False):
398                        continue # don't check this covered row
399                    else:
400                        default_count += 1
401                        if row["status"] != require_default:
402                            where = "In " + " → ".join(path + [desc])
403                            explanation += (
404                                f"{where} status '{row['status']}' did"
405                                f" not match required default status"
406                                f" '{require_default}'.\n"
407                            )
408                            passed = False
409            else: # Recurse
410                for row in rows:
411                    desc = row["description"][0]
412                    subtable = row["subtable"]
413                    sub_success, sub_expl = check_default_statuses(
414                        subtable,
415                        covered.get(desc, {}),
416                        path + [desc]
417                    )
418                    if not sub_success:
419                        passed = False
420                        explanation += sub_expl
421
422            if passed:
423                explanation = (
424                    f"All non-expected statuses were"
425                    f" '{require_default}'."
426                )
427            return passed, explanation
428
429        default_success, default_expl = check_default_statuses(
430            report["table"],
431            coverage,
432            path=[]
433        )
434        if not default_success:
435            succeeded = False
436            explanation += default_expl
437
438    if succeeded:
439        explanation = "All {} expectation(s){} were met.".format(
440            len(all_expectations),
441            (
442                f" (plus {default_count} default expectation(s))"
443                if default_count > 0
444                else ""
445            )
446        )
447
448    # Check for warnings and replace/augment explanation
449    if len(unexpected_warnings) > 0:
450        wmsg = (
451            "The report included unexpected warnings:\n  "
452          + "\n ".join(unexpected_warnings)
453        )
454        if succeeded:
455            explanation = wmsg
456        else:
457            explanation = wmsg + '\n' + explanation
458
459        succeeded = False
460
461    return (succeeded, explanation)

Given a report and a list of Expectation and/or ExpectedWarning objects, this function checks each of the expectations within the provided report, returning a tuple containing True or False to indicate success or failure, as well as a multi-line string explaining which checks failed or that all checks succeeded.

If require_default is provided, then all rubric rows in the report which don't have an explicit Expectation provided for them or a sub-row at the given default_level must match the require_default status. Set require_default to None (the default is 'accomplished') to leave non-explicitly-checked rows unchecked.

def example(username, extra_modes=()):
464def example(username, extra_modes=()):
465    """
466    Registers a current username such that calls to `expect` and/or
467    `expect_validation` create expectations for that example submission,
468    and creates an "evaluation" entry in the expectations table for it so
469    that even if no expectations are established it will still be tested
470    using default expectations.
471
472    If extra_modes is provided, it should be a list of strings naming
473    extra modes to check (e.g., ["validation"]). Note that as soon as an
474    expectation is established for any mode that mode will be checked
475    even if it wasn't specified here.
476    """
477    global CURRENT_EXAMPLE
478    CURRENT_EXAMPLE = username
479    mname = file_utils.get_spec_module_name()
480    for mode in ["evaluation"] + list(extra_modes):
481        EXPECTATIONS\
482            .setdefault(mname, {})\
483            .setdefault(mode, {})\
484            .setdefault(username, [])

Registers a current username such that calls to expect and/or expect_validation create expectations for that example submission, and creates an "evaluation" entry in the expectations table for it so that even if no expectations are established it will still be tested using default expectations.

If extra_modes is provided, it should be a list of strings naming extra modes to check (e.g., ["validation"]). Note that as soon as an expectation is established for any mode that mode will be checked even if it wasn't specified here.

def expect(status, *id_or_path, mode='evaluation'):
487def expect(status, *id_or_path, mode="evaluation"):
488    """
489    Creates an `Expectation` object and registers it under the current
490    example username as an evaluation expectation.
491
492    Arguments are:
493
494    - status: The expected status. See `potluck.rubrics.Goal`.
495    - id_or_path: One or more additional strings specifying which goal we're
496        targeting (see `Expectation`). May also be a single string that
497        starts with '#' to specify the goal using its identifier instead
498        of a rubric-description-path. If it's a single string, it should
499        start with the goal type and then category when using the default
500        rubric metric.
501    - mode: Keyword-only; sets which mode of testing the expectation
502        applies to. Valid modes are "evaluation" (the default) and
503        "validation".
504    """
505    mname = file_utils.get_spec_module_name()
506
507    if len(id_or_path) == 1 and id_or_path[0].startswith('#'):
508        goal_spec = id_or_path[0][1:]
509    else:
510        goal_spec = id_or_path
511
512    EXPECTATIONS\
513        .setdefault(mname, {})\
514        .setdefault(mode, {})\
515        .setdefault(CURRENT_EXAMPLE, [])\
516        .append(
517            Expectation(
518                goal_spec,
519                status
520            )
521        )

Creates an Expectation object and registers it under the current example username as an evaluation expectation.

Arguments are:

  • status: The expected status. See potluck.rubrics.Goal.
  • id_or_path: One or more additional strings specifying which goal we're targeting (see Expectation). May also be a single string that starts with '#' to specify the goal using its identifier instead of a rubric-description-path. If it's a single string, it should start with the goal type and then category when using the default rubric metric.
  • mode: Keyword-only; sets which mode of testing the expectation applies to. Valid modes are "evaluation" (the default) and "validation".
def expect_validation(*args):
524def expect_validation(*args):
525    """
526    Establishes an expectation for the validation step. This is just a
527    shortcut for calling `expect` with mode set to "validation".
528    """
529    expect(*args, mode="validation")

Establishes an expectation for the validation step. This is just a shortcut for calling expect with mode set to "validation".

def expect_warnings(fragment='', mode='evaluation'):
532def expect_warnings(fragment='', mode="evaluation"):
533    """
534    Creates an `ExpectedWarning` object and registers it under the
535    current example username. Registers for evaluation by default, but
536    you can specify a different mode (e.g., "validation").
537
538    The `fragment` argument if omitted will cause all warnings to be
539    treated as expected, but if provided, only warnings whose raw HTML
540    message string contains that fragment as a substring will be treated
541    as expected.
542    """
543    mname = file_utils.get_spec_module_name()
544
545    EXPECTATIONS\
546        .setdefault(mname, {})\
547        .setdefault(mode, {})\
548        .setdefault(CURRENT_EXAMPLE, [])\
549        .append(ExpectedWarning(fragment))

Creates an ExpectedWarning object and registers it under the current example username. Registers for evaluation by default, but you can specify a different mode (e.g., "validation").

The fragment argument if omitted will cause all warnings to be treated as expected, but if provided, only warnings whose raw HTML message string contains that fragment as a substring will be treated as expected.

def get_expectations(spec_module, mode='evaluation'):
552def get_expectations(spec_module, mode="evaluation"):
553    """
554    Returns all expectations for the given specification module and mode,
555    as a dictionary mapping user IDs to expectation lists. Returns None
556    if there are no expectations for the target mode or for the target
557    module, and an empty expectation set hasn't been set up either.
558    """
559    return EXPECTATIONS.get(spec_module.__name__, {}).get(mode, None)

Returns all expectations for the given specification module and mode, as a dictionary mapping user IDs to expectation lists. Returns None if there are no expectations for the target mode or for the target module, and an empty expectation set hasn't been set up either.