Source code for brian2.utils.stringtools

"""
A collection of tools for string formatting tasks.
"""

import re
import string

__all__ = [
    "indent",
    "deindent",
    "word_substitute",
    "replace",
    "get_identifiers",
    "strip_empty_lines",
    "stripped_deindented_lines",
    "strip_empty_leading_and_trailing_lines",
    "code_representation",
    "SpellChecker",
]


[docs] def indent(text, numtabs=1, spacespertab=4, tab=None): """ Indents a given multiline string. By default, indentation is done using spaces rather than tab characters. To use tab characters, specify the tab character explictly, e.g.:: indent(text, tab='\t') Note that in this case ``spacespertab`` is ignored. Examples -------- >>> multiline = '''def f(x): ... return x*x''' >>> print(multiline) def f(x): return x*x >>> print(indent(multiline)) def f(x): return x*x >>> print(indent(multiline, numtabs=2)) def f(x): return x*x >>> print(indent(multiline, spacespertab=2)) def f(x): return x*x >>> print(indent(multiline, tab='####')) ####def f(x): #### return x*x """ if tab is None: tab = " " * spacespertab indent = tab * numtabs indentedstring = indent + text.replace("\n", f"\n{indent}") return indentedstring
[docs] def deindent(text, numtabs=None, spacespertab=4, docstring=False): """ Returns a copy of the string with the common indentation removed. Note that all tab characters are replaced with ``spacespertab`` spaces. If the ``docstring`` flag is set, the first line is treated differently and is assumed to be already correctly tabulated. If the ``numtabs`` option is given, the amount of indentation to remove is given explicitly and not the common indentation. Examples -------- Normal strings, e.g. function definitions: >>> multiline = ''' def f(x): ... return x**2''' >>> print(multiline) def f(x): return x**2 >>> print(deindent(multiline)) def f(x): return x**2 >>> print(deindent(multiline, docstring=True)) def f(x): return x**2 >>> print(deindent(multiline, numtabs=1, spacespertab=2)) def f(x): return x**2 Docstrings: >>> docstring = '''First docstring line. ... This line determines the indentation.''' >>> print(docstring) First docstring line. This line determines the indentation. >>> print(deindent(docstring, docstring=True)) First docstring line. This line determines the indentation. """ text = text.replace("\t", " " * spacespertab) lines = text.split("\n") # if it's a docstring, we search for the common tabulation starting from # line 1, otherwise we use all lines if docstring: start = 1 else: start = 0 if docstring and len(lines) < 2: # nothing to do return text # Find the minimum indentation level if numtabs is not None: indentlevel = numtabs * spacespertab else: lineseq = [ len(line) - len(line.lstrip()) for line in lines[start:] if len(line.strip()) ] if len(lineseq) == 0: indentlevel = 0 else: indentlevel = min(lineseq) # remove the common indentation lines[start:] = [line[indentlevel:] for line in lines[start:]] return "\n".join(lines)
[docs] def word_substitute(expr, substitutions): """ Applies a dict of word substitutions. The dict ``substitutions`` consists of pairs ``(word, rep)`` where each word ``word`` appearing in ``expr`` is replaced by ``rep``. Here a 'word' means anything matching the regexp ``\\bword\\b``. Examples -------- >>> expr = 'a*_b+c5+8+f(A)' >>> print(word_substitute(expr, {'a':'banana', 'f':'func'})) banana*_b+c5+8+func(A) """ for var, replace_var in substitutions.items(): expr = re.sub(f"\\b{var}\\b", str(replace_var), expr) return expr
[docs] def replace(s, substitutions): """ Applies a dictionary of substitutions. Simpler than `word_substitute`, it does not attempt to only replace words """ for before, after in substitutions.items(): s = s.replace(before, after) return s
KEYWORDS = {"and", "or", "not", "True", "False"}
[docs] def get_identifiers(expr, include_numbers=False): """ Return all the identifiers in a given string ``expr``, that is everything that matches a programming language variable like expression, which is here implemented as the regexp ``\\b[A-Za-z_][A-Za-z0-9_]*\\b``. Parameters ---------- expr : str The string to analyze include_numbers : bool, optional Whether to include number literals in the output. Defaults to ``False``. Returns ------- identifiers : set A set of all the identifiers (and, optionally, numbers) in `expr`. Examples -------- >>> expr = '3-a*_b+c5+8+f(A - .3e-10, tau_2)*17' >>> ids = get_identifiers(expr) >>> print(sorted(list(ids))) ['A', '_b', 'a', 'c5', 'f', 'tau_2'] >>> ids = get_identifiers(expr, include_numbers=True) >>> print(sorted(list(ids))) ['.3e-10', '17', '3', '8', 'A', '_b', 'a', 'c5', 'f', 'tau_2'] """ identifiers = set(re.findall(r"\b[A-Za-z_][A-Za-z0-9_]*\b", expr)) if include_numbers: # only the number, not a + or - numbers = set( re.findall( r"(?<=[^A-Za-z_])[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|^[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?", expr, ) ) else: numbers = set() return (identifiers - KEYWORDS) | numbers
[docs] def strip_empty_lines(s): """ Removes all empty lines from the multi-line string `s`. Examples -------- >>> multiline = '''A string with ... ... an empty line.''' >>> print(strip_empty_lines(multiline)) A string with an empty line. """ return "\n".join(line for line in s.split("\n") if line.strip())
[docs] def strip_empty_leading_and_trailing_lines(s): """ Removes all empty leading and trailing lines in the multi-line string `s`. """ lines = s.split("\n") while lines and not lines[0].strip(): del lines[0] while lines and not lines[-1].strip(): del lines[-1] return "\n".join(lines)
[docs] def stripped_deindented_lines(code): """ Returns a list of the lines in a multi-line string, deindented. """ code = deindent(code) code = strip_empty_lines(code) lines = code.split("\n") return lines
[docs] def code_representation(code): """ Returns a string representation for several different formats of code Formats covered include: - A single string - A list of statements/strings - A dict of strings - A dict of lists of statements/strings """ if not isinstance(code, (str, list, tuple, dict)): code = str(code) if isinstance(code, str): return strip_empty_leading_and_trailing_lines(code) if not isinstance(code, dict): code = {None: code} else: code = code.copy() for k, v in code.items(): if isinstance(v, (list, tuple)): v = "\n".join([str(line) for line in v]) code[k] = v if len(code) == 1 and list(code.keys())[0] is None: return strip_empty_leading_and_trailing_lines(list(code.values())[0]) output = [] for k, v in code.items(): msg = f"Key {k}:\n" msg += indent(str(v)) output.append(msg) return strip_empty_leading_and_trailing_lines("\n".join(output))
# The below is adapted from Peter Norvig's spelling corrector # http://norvig.com/spell.py (MIT licensed)
[docs] class SpellChecker: """ A simple spell checker that will be used to suggest the correct name if the user made a typo (e.g. for state variable names). Parameters ---------- words : iterable of str The known words alphabet : iterable of str, optional The allowed characters. Defaults to the characters allowed for identifiers, i.e. ascii characters, digits and the underscore. """ def __init__(self, words, alphabet=f"{string.ascii_lowercase + string.digits}_"): self.words = words self.alphabet = alphabet
[docs] def edits1(self, word): s = [(word[:i], word[i:]) for i in range(len(word) + 1)] deletes = [a + b[1:] for a, b in s if b] transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b) > 1] replaces = [a + c + b[1:] for a, b in s for c in self.alphabet if b] inserts = [a + c + b for a, b in s for c in self.alphabet] return set(deletes + transposes + replaces + inserts)
[docs] def known_edits2(self, word): return { e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if e2 in self.words }
[docs] def known(self, words): return {w for w in words if w in self.words}
[docs] def suggest(self, word): return self.known(self.edits1(word)) or self.known_edits2(word) or set()