exploration.utils
- Authors: Peter Mawhorter
- Consulted:
- Date: 2023-12-29
- Purpose: Utility functions with no specific relevance to particular sub-systems.
1""" 2- Authors: Peter Mawhorter 3- Consulted: 4- Date: 2023-12-29 5- Purpose: Utility functions with no specific relevance to particular 6 sub-systems. 7 8""" 9 10from typing import Collection, Optional, Tuple 11 12import random 13 14#-------------------# 15# Utility functions # 16#-------------------# 17 18RANDOM_NAME_SUFFIXES = False 19""" 20Causes `uniqueName` to use random suffixes instead of sequential ones, 21which is more efficient when many name collisions are expected but which 22makes things harder to test and debug. False by default. 23""" 24 25 26def uniqueName(base: str, existing: Collection) -> str: 27 """ 28 Finds a unique name relative to a collection of existing names, 29 using the given base name, plus a unique suffix if that base name is 30 among the existing names. If the base name isn't among the existing 31 names, just returns the base name. The suffix consists of a period 32 followed by a number, and the lowest unused number is used every 33 time. This does lead to poor performance in cases where many 34 collisions are expected; you can set `RANDOM_NAME_SUFFIXES` to True 35 to use a random suffix instead. 36 37 Note that if the base name already has a numerical suffix, that 38 suffix will be changed instead of adding another one. 39 """ 40 # Short-circuit if we're already unique 41 if base not in existing: 42 return base 43 44 # Ensure a digit suffix 45 if ( 46 '.' not in base 47 or not base.split('.')[-1].isdigit() 48 ): 49 base += '.1' 50 51 # Find the split point for the suffix 52 # This will be the index after the '.' 53 splitPoint = len(base) - list(reversed(base)).index('.') 54 if not RANDOM_NAME_SUFFIXES: 55 suffix = int(base[splitPoint:]) 56 57 while base in existing: 58 if RANDOM_NAME_SUFFIXES: 59 base = base[:splitPoint] + str(random.randint(0, 1000000)) 60 else: 61 suffix += 1 62 base = base[:splitPoint] + str(suffix) 63 64 return base 65 66 67ABBR_SYMBOLS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 68""" 69The list of symbols to use, in order, for abbreviations, adding 70secondary symbols when the initial list runs out. It's stored as a 71string, since each item is just one letter. 72""" 73 74 75def nextAbbrKey(currentKey: Optional[str]) -> str: 76 """ 77 Given an abbreviation keys, returns the next abbreviation key after 78 that. Abbreviation keys are constructed using the `ABBR_SYMBOLS` as 79 a base. If the argument is `None`, the first of the `ABBR_SYMBOLS` 80 will be returned. For example: 81 82 >>> nextAbbrKey(None) 83 'A' 84 >>> nextAbbrKey('A') 85 'B' 86 >>> nextAbbrKey('P') 87 'Q' 88 >>> nextAbbrKey('Z') 89 'AA' 90 >>> nextAbbrKey('AZ') 91 'BA' 92 >>> nextAbbrKey('BM') 93 'BN' 94 >>> nextAbbrKey('ZZ') 95 'AAA' 96 >>> nextAbbrKey('ZZZZ') 97 'AAAAA' 98 """ 99 if currentKey is None: 100 return ABBR_SYMBOLS[0] 101 else: 102 digits = [ABBR_SYMBOLS.index(c) for c in currentKey] 103 limit = len(ABBR_SYMBOLS) 104 digits[-1] += 1 105 i = -1 106 while digits[i] >= limit: 107 digits[i] = 0 108 try: 109 digits[i - 1] += 1 110 i -= 1 111 except IndexError: # Overflow into a non-existent digit 112 digits.insert(0, 0) 113 break 114 return ''.join(ABBR_SYMBOLS[d] for d in digits) 115 116 117def abbr(string: str, length: int = 4) -> str: 118 """ 119 Returns an abbreviated version of the given string, using at most 120 the given number of characters. Creates two alternatives: a 121 version without non-alphanumerics, and a version without 122 non-alphanumerics or vowels (except an initial vowel). If the entire 123 string fits in the given length, it just returns that. If not, and 124 the version with just alphanumerics fits in the given length, or 125 the version without vowels is shorter than necessary, returns the 126 version with just alphanumerics, up to the given length. Otherwise, 127 returns the alphanumeric version without non-initial vowels. 128 Examples: 129 130 >>> abbr('abc') 131 'abc' 132 >>> abbr('abcdefgh') 133 'abcd' 134 >>> abbr('aeiou') 135 'aeio' 136 >>> abbr('axyzeiou') 137 'axyz' 138 >>> abbr('aeiouxyz') 139 'axyz' 140 >>> abbr('AEIOUXYZ') 141 'AXYZ' 142 >>> abbr('-hi-') # fits 143 '-hi-' 144 >>> abbr('--hi--') # doesn't fit 145 'hi' 146 >>> abbr('A to wa') 147 'Atow' 148 >>> abbr('A to wor') 149 'Atwr' 150 """ 151 # Three results: all characters, all alphanumerics, and all 152 # non-vowel alphanumerics (up to the given length + initial vowel) 153 result1 = '' 154 result2 = '' 155 index = 0 156 while index < len(string) and len(result1) < length: 157 c = string[index] 158 if not c.isalnum(): 159 pass 160 elif c.lower() in 'aeiou' and index > 0: 161 result2 += c 162 else: 163 result1 += c 164 result2 += c 165 index += 1 166 167 # Use ~ least restricted result that's short enough 168 if len(string) <= length: 169 return string 170 elif len(result2) <= length or len(result1) < length: 171 return result2[:length] 172 else: 173 return result1 174 175 176def quoted(string: str) -> str: 177 """ 178 Returns a string that starts and ends with double quotes, which will 179 evaluate to the given string using `eval`. Adds a layer of 180 backslashes before any backslashes and/or double quotes in the 181 original string. Different from `repr` because it always uses double 182 quotes. Raises a `ValueError` if given a multi-line string because 183 multi-line strings cannot be properly quoted using just a single 184 pair of double quotes. 185 186 >>> quoted('1\\n2') 187 Traceback (most recent call last): 188 ... 189 ValueError... 190 >>> quoted('12') 191 '"12"' 192 >>> quoted('back\\\\slash') 193 '"back\\\\\\\\slash"' 194 >>> quoted('"Yes!" she said, "it\\'s finished."') 195 '"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished.\\\\""' 196 """ 197 if '\n' in string: 198 raise ValueError("Cannot quote a multi-line string.") 199 200 return '"' + string.translate({ord('"'): '\\"', ord('\\'): '\\\\'}) + '"' 201 202 203def unquoted(startsQuoted: str) -> Tuple[str, str]: 204 """ 205 Inverse of `quoted`: takes a string starting with a double quote, 206 and returns the string which got quoted to become that (plus the 207 leftovers after the quoted region). Parses out where the quotes end 208 automatically and accumulates as leftovers any extra part of the 209 string beyond that. Removes one layer of backslashes from 210 everything. Raises a `ValueError` if the string does not start with 211 a double quote or if it does not contain a matching double quote 212 eventually. 213 214 For example: 215 216 >>> unquoted('abc') 217 Traceback (most recent call last): 218 ... 219 ValueError... 220 >>> unquoted('"abc') 221 Traceback (most recent call last): 222 ... 223 ValueError... 224 >>> unquoted('"abc"') 225 ('abc', '') 226 >>> unquoted('"a" = "b"') 227 ('a', ' = "b"') 228 >>> unquoted('"abc" extra') 229 ('abc', ' extra') 230 >>> unquoted('"abc" "extra"') 231 ('abc', ' "extra"') 232 >>> unquoted('"\\\\"abc\\\\""') 233 ('"abc"', '') 234 >>> unquoted('"back\\\\\\\\slash"') 235 ('back\\\\slash', '') 236 >>> unquoted('"O\\'Toole"') 237 ("O'Toole", '') 238 >>> unquoted('"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished!\\\\""') 239 ('"Yes!" she said, "it\\'s finished!"', '') 240 >>> quoted(unquoted('"\\'"')[0]) == '"\\'"' 241 True 242 >>> unquoted(quoted('"\\'"')) == ('"\\'"', '') 243 True 244 """ 245 if not startsQuoted.startswith('"'): 246 raise ValueError( 247 f"No double-quote at start of string: '{startsQuoted}'" 248 ) 249 result = '' 250 leftovers = '' 251 finished = False 252 escaped = False 253 if not startsQuoted.startswith('"'): 254 raise ValueError( 255 f"No starting double quote in string: {repr(startsQuoted)}" 256 ) 257 for c in startsQuoted[1:]: 258 if finished: 259 leftovers += c 260 elif escaped: 261 escaped = False 262 result += c 263 elif c == '\\': 264 escaped = True 265 elif c == '"': 266 finished = True 267 else: 268 result += c 269 if not finished: 270 raise ValueError( 271 f"No matching double-quote to end string: {repr(startsQuoted)}" 272 ) 273 else: 274 return result, leftovers
Causes uniqueName
to use random suffixes instead of sequential ones,
which is more efficient when many name collisions are expected but which
makes things harder to test and debug. False by default.
27def uniqueName(base: str, existing: Collection) -> str: 28 """ 29 Finds a unique name relative to a collection of existing names, 30 using the given base name, plus a unique suffix if that base name is 31 among the existing names. If the base name isn't among the existing 32 names, just returns the base name. The suffix consists of a period 33 followed by a number, and the lowest unused number is used every 34 time. This does lead to poor performance in cases where many 35 collisions are expected; you can set `RANDOM_NAME_SUFFIXES` to True 36 to use a random suffix instead. 37 38 Note that if the base name already has a numerical suffix, that 39 suffix will be changed instead of adding another one. 40 """ 41 # Short-circuit if we're already unique 42 if base not in existing: 43 return base 44 45 # Ensure a digit suffix 46 if ( 47 '.' not in base 48 or not base.split('.')[-1].isdigit() 49 ): 50 base += '.1' 51 52 # Find the split point for the suffix 53 # This will be the index after the '.' 54 splitPoint = len(base) - list(reversed(base)).index('.') 55 if not RANDOM_NAME_SUFFIXES: 56 suffix = int(base[splitPoint:]) 57 58 while base in existing: 59 if RANDOM_NAME_SUFFIXES: 60 base = base[:splitPoint] + str(random.randint(0, 1000000)) 61 else: 62 suffix += 1 63 base = base[:splitPoint] + str(suffix) 64 65 return base
Finds a unique name relative to a collection of existing names,
using the given base name, plus a unique suffix if that base name is
among the existing names. If the base name isn't among the existing
names, just returns the base name. The suffix consists of a period
followed by a number, and the lowest unused number is used every
time. This does lead to poor performance in cases where many
collisions are expected; you can set RANDOM_NAME_SUFFIXES
to True
to use a random suffix instead.
Note that if the base name already has a numerical suffix, that suffix will be changed instead of adding another one.
The list of symbols to use, in order, for abbreviations, adding secondary symbols when the initial list runs out. It's stored as a string, since each item is just one letter.
76def nextAbbrKey(currentKey: Optional[str]) -> str: 77 """ 78 Given an abbreviation keys, returns the next abbreviation key after 79 that. Abbreviation keys are constructed using the `ABBR_SYMBOLS` as 80 a base. If the argument is `None`, the first of the `ABBR_SYMBOLS` 81 will be returned. For example: 82 83 >>> nextAbbrKey(None) 84 'A' 85 >>> nextAbbrKey('A') 86 'B' 87 >>> nextAbbrKey('P') 88 'Q' 89 >>> nextAbbrKey('Z') 90 'AA' 91 >>> nextAbbrKey('AZ') 92 'BA' 93 >>> nextAbbrKey('BM') 94 'BN' 95 >>> nextAbbrKey('ZZ') 96 'AAA' 97 >>> nextAbbrKey('ZZZZ') 98 'AAAAA' 99 """ 100 if currentKey is None: 101 return ABBR_SYMBOLS[0] 102 else: 103 digits = [ABBR_SYMBOLS.index(c) for c in currentKey] 104 limit = len(ABBR_SYMBOLS) 105 digits[-1] += 1 106 i = -1 107 while digits[i] >= limit: 108 digits[i] = 0 109 try: 110 digits[i - 1] += 1 111 i -= 1 112 except IndexError: # Overflow into a non-existent digit 113 digits.insert(0, 0) 114 break 115 return ''.join(ABBR_SYMBOLS[d] for d in digits)
Given an abbreviation keys, returns the next abbreviation key after
that. Abbreviation keys are constructed using the ABBR_SYMBOLS
as
a base. If the argument is None
, the first of the ABBR_SYMBOLS
will be returned. For example:
>>> nextAbbrKey(None)
'A'
>>> nextAbbrKey('A')
'B'
>>> nextAbbrKey('P')
'Q'
>>> nextAbbrKey('Z')
'AA'
>>> nextAbbrKey('AZ')
'BA'
>>> nextAbbrKey('BM')
'BN'
>>> nextAbbrKey('ZZ')
'AAA'
>>> nextAbbrKey('ZZZZ')
'AAAAA'
118def abbr(string: str, length: int = 4) -> str: 119 """ 120 Returns an abbreviated version of the given string, using at most 121 the given number of characters. Creates two alternatives: a 122 version without non-alphanumerics, and a version without 123 non-alphanumerics or vowels (except an initial vowel). If the entire 124 string fits in the given length, it just returns that. If not, and 125 the version with just alphanumerics fits in the given length, or 126 the version without vowels is shorter than necessary, returns the 127 version with just alphanumerics, up to the given length. Otherwise, 128 returns the alphanumeric version without non-initial vowels. 129 Examples: 130 131 >>> abbr('abc') 132 'abc' 133 >>> abbr('abcdefgh') 134 'abcd' 135 >>> abbr('aeiou') 136 'aeio' 137 >>> abbr('axyzeiou') 138 'axyz' 139 >>> abbr('aeiouxyz') 140 'axyz' 141 >>> abbr('AEIOUXYZ') 142 'AXYZ' 143 >>> abbr('-hi-') # fits 144 '-hi-' 145 >>> abbr('--hi--') # doesn't fit 146 'hi' 147 >>> abbr('A to wa') 148 'Atow' 149 >>> abbr('A to wor') 150 'Atwr' 151 """ 152 # Three results: all characters, all alphanumerics, and all 153 # non-vowel alphanumerics (up to the given length + initial vowel) 154 result1 = '' 155 result2 = '' 156 index = 0 157 while index < len(string) and len(result1) < length: 158 c = string[index] 159 if not c.isalnum(): 160 pass 161 elif c.lower() in 'aeiou' and index > 0: 162 result2 += c 163 else: 164 result1 += c 165 result2 += c 166 index += 1 167 168 # Use ~ least restricted result that's short enough 169 if len(string) <= length: 170 return string 171 elif len(result2) <= length or len(result1) < length: 172 return result2[:length] 173 else: 174 return result1
Returns an abbreviated version of the given string, using at most the given number of characters. Creates two alternatives: a version without non-alphanumerics, and a version without non-alphanumerics or vowels (except an initial vowel). If the entire string fits in the given length, it just returns that. If not, and the version with just alphanumerics fits in the given length, or the version without vowels is shorter than necessary, returns the version with just alphanumerics, up to the given length. Otherwise, returns the alphanumeric version without non-initial vowels. Examples:
>>> abbr('abc')
'abc'
>>> abbr('abcdefgh')
'abcd'
>>> abbr('aeiou')
'aeio'
>>> abbr('axyzeiou')
'axyz'
>>> abbr('aeiouxyz')
'axyz'
>>> abbr('AEIOUXYZ')
'AXYZ'
>>> abbr('-hi-') # fits
'-hi-'
>>> abbr('--hi--') # doesn't fit
'hi'
>>> abbr('A to wa')
'Atow'
>>> abbr('A to wor')
'Atwr'
177def quoted(string: str) -> str: 178 """ 179 Returns a string that starts and ends with double quotes, which will 180 evaluate to the given string using `eval`. Adds a layer of 181 backslashes before any backslashes and/or double quotes in the 182 original string. Different from `repr` because it always uses double 183 quotes. Raises a `ValueError` if given a multi-line string because 184 multi-line strings cannot be properly quoted using just a single 185 pair of double quotes. 186 187 >>> quoted('1\\n2') 188 Traceback (most recent call last): 189 ... 190 ValueError... 191 >>> quoted('12') 192 '"12"' 193 >>> quoted('back\\\\slash') 194 '"back\\\\\\\\slash"' 195 >>> quoted('"Yes!" she said, "it\\'s finished."') 196 '"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished.\\\\""' 197 """ 198 if '\n' in string: 199 raise ValueError("Cannot quote a multi-line string.") 200 201 return '"' + string.translate({ord('"'): '\\"', ord('\\'): '\\\\'}) + '"'
Returns a string that starts and ends with double quotes, which will
evaluate to the given string using eval
. Adds a layer of
backslashes before any backslashes and/or double quotes in the
original string. Different from repr
because it always uses double
quotes. Raises a ValueError
if given a multi-line string because
multi-line strings cannot be properly quoted using just a single
pair of double quotes.
>>> quoted('1\n2')
Traceback (most recent call last):
...
ValueError...
>>> quoted('12')
'"12"'
>>> quoted('back\\slash')
'"back\\\\slash"'
>>> quoted('"Yes!" she said, "it\'s finished."')
'"\\"Yes!\\" she said, \\"it\'s finished.\\""'
204def unquoted(startsQuoted: str) -> Tuple[str, str]: 205 """ 206 Inverse of `quoted`: takes a string starting with a double quote, 207 and returns the string which got quoted to become that (plus the 208 leftovers after the quoted region). Parses out where the quotes end 209 automatically and accumulates as leftovers any extra part of the 210 string beyond that. Removes one layer of backslashes from 211 everything. Raises a `ValueError` if the string does not start with 212 a double quote or if it does not contain a matching double quote 213 eventually. 214 215 For example: 216 217 >>> unquoted('abc') 218 Traceback (most recent call last): 219 ... 220 ValueError... 221 >>> unquoted('"abc') 222 Traceback (most recent call last): 223 ... 224 ValueError... 225 >>> unquoted('"abc"') 226 ('abc', '') 227 >>> unquoted('"a" = "b"') 228 ('a', ' = "b"') 229 >>> unquoted('"abc" extra') 230 ('abc', ' extra') 231 >>> unquoted('"abc" "extra"') 232 ('abc', ' "extra"') 233 >>> unquoted('"\\\\"abc\\\\""') 234 ('"abc"', '') 235 >>> unquoted('"back\\\\\\\\slash"') 236 ('back\\\\slash', '') 237 >>> unquoted('"O\\'Toole"') 238 ("O'Toole", '') 239 >>> unquoted('"\\\\"Yes!\\\\" she said, \\\\"it\\'s finished!\\\\""') 240 ('"Yes!" she said, "it\\'s finished!"', '') 241 >>> quoted(unquoted('"\\'"')[0]) == '"\\'"' 242 True 243 >>> unquoted(quoted('"\\'"')) == ('"\\'"', '') 244 True 245 """ 246 if not startsQuoted.startswith('"'): 247 raise ValueError( 248 f"No double-quote at start of string: '{startsQuoted}'" 249 ) 250 result = '' 251 leftovers = '' 252 finished = False 253 escaped = False 254 if not startsQuoted.startswith('"'): 255 raise ValueError( 256 f"No starting double quote in string: {repr(startsQuoted)}" 257 ) 258 for c in startsQuoted[1:]: 259 if finished: 260 leftovers += c 261 elif escaped: 262 escaped = False 263 result += c 264 elif c == '\\': 265 escaped = True 266 elif c == '"': 267 finished = True 268 else: 269 result += c 270 if not finished: 271 raise ValueError( 272 f"No matching double-quote to end string: {repr(startsQuoted)}" 273 ) 274 else: 275 return result, leftovers
Inverse of quoted
: takes a string starting with a double quote,
and returns the string which got quoted to become that (plus the
leftovers after the quoted region). Parses out where the quotes end
automatically and accumulates as leftovers any extra part of the
string beyond that. Removes one layer of backslashes from
everything. Raises a ValueError
if the string does not start with
a double quote or if it does not contain a matching double quote
eventually.
For example:
>>> unquoted('abc')
Traceback (most recent call last):
...
ValueError...
>>> unquoted('"abc')
Traceback (most recent call last):
...
ValueError...
>>> unquoted('"abc"')
('abc', '')
>>> unquoted('"a" = "b"')
('a', ' = "b"')
>>> unquoted('"abc" extra')
('abc', ' extra')
>>> unquoted('"abc" "extra"')
('abc', ' "extra"')
>>> unquoted('"\\"abc\\""')
('"abc"', '')
>>> unquoted('"back\\\\slash"')
('back\\slash', '')
>>> unquoted('"O\'Toole"')
("O'Toole", '')
>>> unquoted('"\\"Yes!\\" she said, \\"it\'s finished!\\""')
('"Yes!" she said, "it\'s finished!"', '')
>>> quoted(unquoted('"\'"')[0]) == '"\'"'
True
>>> unquoted(quoted('"\'"')) == ('"\'"', '')
True