exploration.utils

Authors: Peter Mawhorter
Consulted:
Date: 2023-12-29
Purpose: Utility functions with no specific relevance to particular sub-systems.

View Source

  1"""
  2- Authors: Peter Mawhorter
  3- Consulted:
  4- Date: 2023-12-29
  5- Purpose: Utility functions with no specific relevance to particular
  6    sub-systems.
  7
  8"""
  9
 10from typing import Collection, Optional, Tuple
 11
 12import random
 13
 14#-------------------#
 15# Utility functions #
 16#-------------------#
 17
 18RANDOM_NAME_SUFFIXES = False
 19"""
 20Causes `uniqueName` to use random suffixes instead of sequential ones,
 21which is more efficient when many name collisions are expected but which
 22makes things harder to test and debug. False by default.
 23"""
 24
 25
 26def uniqueName(base: str, existing: Collection) -> str:
 27    """
 28    Finds a unique name relative to a collection of existing names,
 29    using the given base name, plus a unique suffix if that base name is
 30    among the existing names. If the base name isn't among the existing
 31    names, just returns the base name. The suffix consists of a period
 32    followed by a number, and the lowest unused number is used every
 33    time. This does lead to poor performance in cases where many
 34    collisions are expected; you can set `RANDOM_NAME_SUFFIXES` to True
 35    to use a random suffix instead.
 36
 37    Note that if the base name already has a numerical suffix, that
 38    suffix will be changed instead of adding another one.
 39    """
 40    # Short-circuit if we're already unique
 41    if base not in existing:
 42        return base
 43
 44    # Ensure a digit suffix
 45    if (
 46        '.' not in base
 47     or not base.split('.')[-1].isdigit()
 48    ):
 49        base += '.1'
 50
 51    # Find the split point for the suffix
 52    # This will be the index after the '.'
 53    splitPoint = len(base) - list(reversed(base)).index('.')
 54    if not RANDOM_NAME_SUFFIXES:
 55        suffix = int(base[splitPoint:])
 56
 57    while base in existing:
 58        if RANDOM_NAME_SUFFIXES:
 59            base = base[:splitPoint] + str(random.randint(0, 1000000))
 60        else:
 61            suffix += 1
 62            base = base[:splitPoint] + str(suffix)
 63
 64    return base
 65
 66
 67ABBR_SYMBOLS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 68"""
 69The list of symbols to use, in order, for abbreviations, adding
 70secondary symbols when the initial list runs out. It's stored as a
 71string, since each item is just one letter.
 72"""
 73
 74
 75def nextAbbrKey(currentKey: Optional[str]) -> str:
 76    """
 77    Given an abbreviation keys, returns the next abbreviation key after
 78    that. Abbreviation keys are constructed using the `ABBR_SYMBOLS` as
 79    a base. If the argument is `None`, the first of the `ABBR_SYMBOLS`
 80    will be returned. For example:
 81
 82    >>> nextAbbrKey(None)
 83    'A'
 84    >>> nextAbbrKey('A')
 85    'B'
 86    >>> nextAbbrKey('P')
 87    'Q'
 88    >>> nextAbbrKey('Z')
 89    'AA'
 90    >>> nextAbbrKey('AZ')
 91    'BA'
 92    >>> nextAbbrKey('BM')
 93    'BN'
 94    >>> nextAbbrKey('ZZ')
 95    'AAA'
 96    >>> nextAbbrKey('ZZZZ')
 97    'AAAAA'
 98    """
 99    if currentKey is None:
100        return ABBR_SYMBOLS[0]
101    else:
102        digits = [ABBR_SYMBOLS.index(c) for c in currentKey]
103        limit = len(ABBR_SYMBOLS)
104        digits[-1] += 1
105        i = -1
106        while digits[i] >= limit:
107            digits[i] = 0
108            try:
109                digits[i - 1] += 1
110                i -= 1
111            except IndexError:  # Overflow into a non-existent digit
112                digits.insert(0, 0)
113                break
114        return ''.join(ABBR_SYMBOLS[d] for d in digits)
115
116
117def abbr(string: str, length: int = 4) -> str:
118    """
119    Returns an abbreviated version of the given string, using at most
120    the given number of characters. Creates two alternatives: a
121    version without non-alphanumerics, and a version without
122    non-alphanumerics or vowels (except an initial vowel). If the entire
123    string fits in the given length, it just returns that. If not, and
124    the version with just alphanumerics fits in the given length, or
125    the version without vowels is shorter than necessary, returns the
126    version with just alphanumerics, up to the given length. Otherwise,
127    returns the alphanumeric version without non-initial vowels.
128    Examples:
129
130    >>> abbr('abc')
131    'abc'
132    >>> abbr('abcdefgh')
133    'abcd'
134    >>> abbr('aeiou')
135    'aeio'
136    >>> abbr('axyzeiou')
137    'axyz'
138    >>> abbr('aeiouxyz')
139    'axyz'
140    >>> abbr('AEIOUXYZ')
141    'AXYZ'
142    >>> abbr('-hi-')  # fits
143    '-hi-'
144    >>> abbr('--hi--')  # doesn't fit
145    'hi'
146    >>> abbr('A to wa')
147    'Atow'
148    >>> abbr('A to wor')
149    'Atwr'
150    """
151    # Three results: all characters, all alphanumerics, and all
152    # non-vowel alphanumerics (up to the given length + initial vowel)
153    result1 = ''
154    result2 = ''
155    index = 0
156    while index < len(string) and len(result1) < length:
157        c = string[index]
158        if not c.isalnum():
159            pass
160        elif c.lower() in 'aeiou' and index > 0:
161            result2 += c
162        else:
163            result1 += c
164            result2 += c
165        index += 1
166
167    # Use ~ least restricted result that's short enough
168    if len(string) <= length:
169        return string
170    elif len(result2) <= length or len(result1) < length:
171        return result2[:length]
172    else:
173        return result1
174
175
176def quoted(string: str) -> str:
177    """
178    Returns a string that starts and ends with double quotes, which will
179    evaluate to the given string using `eval`. Adds a layer of
180    backslashes before any backslashes and/or double quotes in the
181    original string. Different from `repr` because it always uses double
182    quotes. Raises a `ValueError` if given a multi-line string because
183    multi-line strings cannot be properly quoted using just a single
184    pair of double quotes.
185
186    >>> quoted('1\\n2')
187    Traceback (most recent call last):
188    ...
189    ValueError...
190    >>> quoted('12')
191    '"12"'
192    >>> quoted('back\\\\slash')
193    '"back\\\\\\\\slash"'
194    >>> quoted('"Yes!" she said, "it\\'s finished."')
195    '"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished.\\\\""'
196    """
197    if '\n' in string:
198        raise ValueError("Cannot quote a multi-line string.")
199
200    return '"' + string.translate({ord('"'): '\\"', ord('\\'): '\\\\'}) + '"'
201
202
203def unquoted(startsQuoted: str) -> Tuple[str, str]:
204    """
205    Inverse of `quoted`: takes a string starting with a double quote,
206    and returns the string which got quoted to become that (plus the
207    leftovers after the quoted region). Parses out where the quotes end
208    automatically and accumulates as leftovers any extra part of the
209    string beyond that. Removes one layer of backslashes from
210    everything. Raises a `ValueError` if the string does not start with
211    a double quote or if it does not contain a matching double quote
212    eventually.
213
214    For example:
215
216    >>> unquoted('abc')
217    Traceback (most recent call last):
218    ...
219    ValueError...
220    >>> unquoted('"abc')
221    Traceback (most recent call last):
222    ...
223    ValueError...
224    >>> unquoted('"abc"')
225    ('abc', '')
226    >>> unquoted('"a" = "b"')
227    ('a', ' = "b"')
228    >>> unquoted('"abc" extra')
229    ('abc', ' extra')
230    >>> unquoted('"abc" "extra"')
231    ('abc', ' "extra"')
232    >>> unquoted('"\\\\"abc\\\\""')
233    ('"abc"', '')
234    >>> unquoted('"back\\\\\\\\slash"')
235    ('back\\\\slash', '')
236    >>> unquoted('"O\\'Toole"')
237    ("O'Toole", '')
238    >>> unquoted('"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished!\\\\""')
239    ('"Yes!" she said, "it\\'s finished!"', '')
240    >>> quoted(unquoted('"\\'"')[0]) == '"\\'"'
241    True
242    >>> unquoted(quoted('"\\'"')) == ('"\\'"', '')
243    True
244    """
245    if not startsQuoted.startswith('"'):
246        raise ValueError(
247            f"No double-quote at start of string: '{startsQuoted}'"
248        )
249    result = ''
250    leftovers = ''
251    finished = False
252    escaped = False
253    if not startsQuoted.startswith('"'):
254        raise ValueError(
255            f"No starting double quote in string: {repr(startsQuoted)}"
256        )
257    for c in startsQuoted[1:]:
258        if finished:
259            leftovers += c
260        elif escaped:
261            escaped = False
262            result += c
263        elif c == '\\':
264            escaped = True
265        elif c == '"':
266            finished = True
267        else:
268            result += c
269    if not finished:
270        raise ValueError(
271            f"No matching double-quote to end string: {repr(startsQuoted)}"
272        )
273    else:
274        return result, leftovers

RANDOM_NAME_SUFFIXES = False

Causes uniqueName to use random suffixes instead of sequential ones, which is more efficient when many name collisions are expected but which makes things harder to test and debug. False by default.

def uniqueName(base: str, existing: Collection) -> str: View Source

27def uniqueName(base: str, existing: Collection) -> str:
28    """
29    Finds a unique name relative to a collection of existing names,
30    using the given base name, plus a unique suffix if that base name is
31    among the existing names. If the base name isn't among the existing
32    names, just returns the base name. The suffix consists of a period
33    followed by a number, and the lowest unused number is used every
34    time. This does lead to poor performance in cases where many
35    collisions are expected; you can set `RANDOM_NAME_SUFFIXES` to True
36    to use a random suffix instead.
37
38    Note that if the base name already has a numerical suffix, that
39    suffix will be changed instead of adding another one.
40    """
41    # Short-circuit if we're already unique
42    if base not in existing:
43        return base
44
45    # Ensure a digit suffix
46    if (
47        '.' not in base
48     or not base.split('.')[-1].isdigit()
49    ):
50        base += '.1'
51
52    # Find the split point for the suffix
53    # This will be the index after the '.'
54    splitPoint = len(base) - list(reversed(base)).index('.')
55    if not RANDOM_NAME_SUFFIXES:
56        suffix = int(base[splitPoint:])
57
58    while base in existing:
59        if RANDOM_NAME_SUFFIXES:
60            base = base[:splitPoint] + str(random.randint(0, 1000000))
61        else:
62            suffix += 1
63            base = base[:splitPoint] + str(suffix)
64
65    return base

Finds a unique name relative to a collection of existing names, using the given base name, plus a unique suffix if that base name is among the existing names. If the base name isn't among the existing names, just returns the base name. The suffix consists of a period followed by a number, and the lowest unused number is used every time. This does lead to poor performance in cases where many collisions are expected; you can set RANDOM_NAME_SUFFIXES to True to use a random suffix instead.

Note that if the base name already has a numerical suffix, that suffix will be changed instead of adding another one.

ABBR_SYMBOLS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

The list of symbols to use, in order, for abbreviations, adding secondary symbols when the initial list runs out. It's stored as a string, since each item is just one letter.

def nextAbbrKey(currentKey: Optional[str]) -> str: View Source

 76def nextAbbrKey(currentKey: Optional[str]) -> str:
 77    """
 78    Given an abbreviation keys, returns the next abbreviation key after
 79    that. Abbreviation keys are constructed using the `ABBR_SYMBOLS` as
 80    a base. If the argument is `None`, the first of the `ABBR_SYMBOLS`
 81    will be returned. For example:
 82
 83    >>> nextAbbrKey(None)
 84    'A'
 85    >>> nextAbbrKey('A')
 86    'B'
 87    >>> nextAbbrKey('P')
 88    'Q'
 89    >>> nextAbbrKey('Z')
 90    'AA'
 91    >>> nextAbbrKey('AZ')
 92    'BA'
 93    >>> nextAbbrKey('BM')
 94    'BN'
 95    >>> nextAbbrKey('ZZ')
 96    'AAA'
 97    >>> nextAbbrKey('ZZZZ')
 98    'AAAAA'
 99    """
100    if currentKey is None:
101        return ABBR_SYMBOLS[0]
102    else:
103        digits = [ABBR_SYMBOLS.index(c) for c in currentKey]
104        limit = len(ABBR_SYMBOLS)
105        digits[-1] += 1
106        i = -1
107        while digits[i] >= limit:
108            digits[i] = 0
109            try:
110                digits[i - 1] += 1
111                i -= 1
112            except IndexError:  # Overflow into a non-existent digit
113                digits.insert(0, 0)
114                break
115        return ''.join(ABBR_SYMBOLS[d] for d in digits)

Given an abbreviation keys, returns the next abbreviation key after that. Abbreviation keys are constructed using the ABBR_SYMBOLS as a base. If the argument is None, the first of the ABBR_SYMBOLS will be returned. For example:

>>> nextAbbrKey(None)
'A'
>>> nextAbbrKey('A')
'B'
>>> nextAbbrKey('P')
'Q'
>>> nextAbbrKey('Z')
'AA'
>>> nextAbbrKey('AZ')
'BA'
>>> nextAbbrKey('BM')
'BN'
>>> nextAbbrKey('ZZ')
'AAA'
>>> nextAbbrKey('ZZZZ')
'AAAAA'

def abbr(string: str, length: int = 4) -> str: View Source

118def abbr(string: str, length: int = 4) -> str:
119    """
120    Returns an abbreviated version of the given string, using at most
121    the given number of characters. Creates two alternatives: a
122    version without non-alphanumerics, and a version without
123    non-alphanumerics or vowels (except an initial vowel). If the entire
124    string fits in the given length, it just returns that. If not, and
125    the version with just alphanumerics fits in the given length, or
126    the version without vowels is shorter than necessary, returns the
127    version with just alphanumerics, up to the given length. Otherwise,
128    returns the alphanumeric version without non-initial vowels.
129    Examples:
130
131    >>> abbr('abc')
132    'abc'
133    >>> abbr('abcdefgh')
134    'abcd'
135    >>> abbr('aeiou')
136    'aeio'
137    >>> abbr('axyzeiou')
138    'axyz'
139    >>> abbr('aeiouxyz')
140    'axyz'
141    >>> abbr('AEIOUXYZ')
142    'AXYZ'
143    >>> abbr('-hi-')  # fits
144    '-hi-'
145    >>> abbr('--hi--')  # doesn't fit
146    'hi'
147    >>> abbr('A to wa')
148    'Atow'
149    >>> abbr('A to wor')
150    'Atwr'
151    """
152    # Three results: all characters, all alphanumerics, and all
153    # non-vowel alphanumerics (up to the given length + initial vowel)
154    result1 = ''
155    result2 = ''
156    index = 0
157    while index < len(string) and len(result1) < length:
158        c = string[index]
159        if not c.isalnum():
160            pass
161        elif c.lower() in 'aeiou' and index > 0:
162            result2 += c
163        else:
164            result1 += c
165            result2 += c
166        index += 1
167
168    # Use ~ least restricted result that's short enough
169    if len(string) <= length:
170        return string
171    elif len(result2) <= length or len(result1) < length:
172        return result2[:length]
173    else:
174        return result1

Returns an abbreviated version of the given string, using at most the given number of characters. Creates two alternatives: a version without non-alphanumerics, and a version without non-alphanumerics or vowels (except an initial vowel). If the entire string fits in the given length, it just returns that. If not, and the version with just alphanumerics fits in the given length, or the version without vowels is shorter than necessary, returns the version with just alphanumerics, up to the given length. Otherwise, returns the alphanumeric version without non-initial vowels. Examples:

>>> abbr('abc')
'abc'
>>> abbr('abcdefgh')
'abcd'
>>> abbr('aeiou')
'aeio'
>>> abbr('axyzeiou')
'axyz'
>>> abbr('aeiouxyz')
'axyz'
>>> abbr('AEIOUXYZ')
'AXYZ'
>>> abbr('-hi-')  # fits
'-hi-'
>>> abbr('--hi--')  # doesn't fit
'hi'
>>> abbr('A to wa')
'Atow'
>>> abbr('A to wor')
'Atwr'

def quoted(string: str) -> str: View Source

177def quoted(string: str) -> str:
178    """
179    Returns a string that starts and ends with double quotes, which will
180    evaluate to the given string using `eval`. Adds a layer of
181    backslashes before any backslashes and/or double quotes in the
182    original string. Different from `repr` because it always uses double
183    quotes. Raises a `ValueError` if given a multi-line string because
184    multi-line strings cannot be properly quoted using just a single
185    pair of double quotes.
186
187    >>> quoted('1\\n2')
188    Traceback (most recent call last):
189    ...
190    ValueError...
191    >>> quoted('12')
192    '"12"'
193    >>> quoted('back\\\\slash')
194    '"back\\\\\\\\slash"'
195    >>> quoted('"Yes!" she said, "it\\'s finished."')
196    '"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished.\\\\""'
197    """
198    if '\n' in string:
199        raise ValueError("Cannot quote a multi-line string.")
200
201    return '"' + string.translate({ord('"'): '\\"', ord('\\'): '\\\\'}) + '"'

Returns a string that starts and ends with double quotes, which will evaluate to the given string using eval. Adds a layer of backslashes before any backslashes and/or double quotes in the original string. Different from repr because it always uses double quotes. Raises a ValueError if given a multi-line string because multi-line strings cannot be properly quoted using just a single pair of double quotes.

>>> quoted('1\n2')
Traceback (most recent call last):
...
ValueError...
>>> quoted('12')
'"12"'
>>> quoted('back\\slash')
'"back\\\\slash"'
>>> quoted('"Yes!" she said, "it\'s finished."')
'"\\"Yes!\\" she said, \\"it\'s finished.\\""'

def unquoted(startsQuoted: str) -> Tuple[str, str]: View Source

204def unquoted(startsQuoted: str) -> Tuple[str, str]:
205    """
206    Inverse of `quoted`: takes a string starting with a double quote,
207    and returns the string which got quoted to become that (plus the
208    leftovers after the quoted region). Parses out where the quotes end
209    automatically and accumulates as leftovers any extra part of the
210    string beyond that. Removes one layer of backslashes from
211    everything. Raises a `ValueError` if the string does not start with
212    a double quote or if it does not contain a matching double quote
213    eventually.
214
215    For example:
216
217    >>> unquoted('abc')
218    Traceback (most recent call last):
219    ...
220    ValueError...
221    >>> unquoted('"abc')
222    Traceback (most recent call last):
223    ...
224    ValueError...
225    >>> unquoted('"abc"')
226    ('abc', '')
227    >>> unquoted('"a" = "b"')
228    ('a', ' = "b"')
229    >>> unquoted('"abc" extra')
230    ('abc', ' extra')
231    >>> unquoted('"abc" "extra"')
232    ('abc', ' "extra"')
233    >>> unquoted('"\\\\"abc\\\\""')
234    ('"abc"', '')
235    >>> unquoted('"back\\\\\\\\slash"')
236    ('back\\\\slash', '')
237    >>> unquoted('"O\\'Toole"')
238    ("O'Toole", '')
239    >>> unquoted('"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished!\\\\""')
240    ('"Yes!" she said, "it\\'s finished!"', '')
241    >>> quoted(unquoted('"\\'"')[0]) == '"\\'"'
242    True
243    >>> unquoted(quoted('"\\'"')) == ('"\\'"', '')
244    True
245    """
246    if not startsQuoted.startswith('"'):
247        raise ValueError(
248            f"No double-quote at start of string: '{startsQuoted}'"
249        )
250    result = ''
251    leftovers = ''
252    finished = False
253    escaped = False
254    if not startsQuoted.startswith('"'):
255        raise ValueError(
256            f"No starting double quote in string: {repr(startsQuoted)}"
257        )
258    for c in startsQuoted[1:]:
259        if finished:
260            leftovers += c
261        elif escaped:
262            escaped = False
263            result += c
264        elif c == '\\':
265            escaped = True
266        elif c == '"':
267            finished = True
268        else:
269            result += c
270    if not finished:
271        raise ValueError(
272            f"No matching double-quote to end string: {repr(startsQuoted)}"
273        )
274    else:
275        return result, leftovers

Inverse of quoted: takes a string starting with a double quote, and returns the string which got quoted to become that (plus the leftovers after the quoted region). Parses out where the quotes end automatically and accumulates as leftovers any extra part of the string beyond that. Removes one layer of backslashes from everything. Raises a ValueError if the string does not start with a double quote or if it does not contain a matching double quote eventually.

For example:

>>> unquoted('abc')
Traceback (most recent call last):
...
ValueError...
>>> unquoted('"abc')
Traceback (most recent call last):
...
ValueError...
>>> unquoted('"abc"')
('abc', '')
>>> unquoted('"a" = "b"')
('a', ' = "b"')
>>> unquoted('"abc" extra')
('abc', ' extra')
>>> unquoted('"abc" "extra"')
('abc', ' "extra"')
>>> unquoted('"\\"abc\\""')
('"abc"', '')
>>> unquoted('"back\\\\slash"')
('back\\slash', '')
>>> unquoted('"O\'Toole"')
("O'Toole", '')
>>> unquoted('"\\"Yes!\\" she said, \\"it\'s finished!\\""')
('"Yes!" she said, "it\'s finished!"', '')
>>> quoted(unquoted('"\'"')[0]) == '"\'"'
True
>>> unquoted(quoted('"\'"')) == ('"\'"', '')
True