Source code for brian2.utils.stringtools

"""
A collection of tools for string formatting tasks.
"""

import re
import string

__all__ = ['indent',
           'deindent',
           'word_substitute',
           'replace',
           'get_identifiers',
           'strip_empty_lines',
           'stripped_deindented_lines',
           'strip_empty_leading_and_trailing_lines',
           'code_representation',
           'SpellChecker'
           ]

[docs]def indent(text, numtabs=1, spacespertab=4, tab=None): ''' Indents a given multiline string. By default, indentation is done using spaces rather than tab characters. To use tab characters, specify the tab character explictly, e.g.:: indent(text, tab='\t') Note that in this case ``spacespertab`` is ignored. Examples -------- >>> multiline = """def f(x): ... return x*x""" >>> print(multiline) def f(x): return x*x >>> print(indent(multiline)) def f(x): return x*x >>> print(indent(multiline, numtabs=2)) def f(x): return x*x >>> print(indent(multiline, spacespertab=2)) def f(x): return x*x >>> print(indent(multiline, tab='####')) ####def f(x): #### return x*x ''' if tab is None: tab = ' '*spacespertab indent = tab*numtabs indentedstring = indent+text.replace('\n', '\n'+indent) return indentedstring
[docs]def deindent(text, numtabs=None, spacespertab=4, docstring=False): ''' Returns a copy of the string with the common indentation removed. Note that all tab characters are replaced with ``spacespertab`` spaces. If the ``docstring`` flag is set, the first line is treated differently and is assumed to be already correctly tabulated. If the ``numtabs`` option is given, the amount of indentation to remove is given explicitly and not the common indentation. Examples -------- Normal strings, e.g. function definitions: >>> multiline = """ def f(x): ... return x**2""" >>> print(multiline) def f(x): return x**2 >>> print(deindent(multiline)) def f(x): return x**2 >>> print(deindent(multiline, docstring=True)) def f(x): return x**2 >>> print(deindent(multiline, numtabs=1, spacespertab=2)) def f(x): return x**2 Docstrings: >>> docstring = """First docstring line. ... This line determines the indentation.""" >>> print(docstring) First docstring line. This line determines the indentation. >>> print(deindent(docstring, docstring=True)) First docstring line. This line determines the indentation. ''' text = text.replace('\t', ' '*spacespertab) lines = text.split('\n') # if it's a docstring, we search for the common tabulation starting from # line 1, otherwise we use all lines if docstring: start = 1 else: start = 0 if docstring and len(lines)<2: # nothing to do return text # Find the minimum indentation level if numtabs is not None: indentlevel = numtabs*spacespertab else: lineseq = [len(line)-len(line.lstrip()) for line in lines[start:] if len(line.strip())] if len(lineseq)==0: indentlevel = 0 else: indentlevel = min(lineseq) # remove the common indentation lines[start:] = [line[indentlevel:] for line in lines[start:]] return '\n'.join(lines)
[docs]def word_substitute(expr, substitutions): ''' Applies a dict of word substitutions. The dict ``substitutions`` consists of pairs ``(word, rep)`` where each word ``word`` appearing in ``expr`` is replaced by ``rep``. Here a 'word' means anything matching the regexp ``\\bword\\b``. Examples -------- >>> expr = 'a*_b+c5+8+f(A)' >>> print(word_substitute(expr, {'a':'banana', 'f':'func'})) banana*_b+c5+8+func(A) ''' for var, replace_var in substitutions.iteritems(): expr = re.sub(r'\b' + var + r'\b', str(replace_var), expr) return expr
[docs]def replace(s, substitutions): ''' Applies a dictionary of substitutions. Simpler than `word_substitute`, it does not attempt to only replace words ''' for before, after in substitutions.iteritems(): s = s.replace(before, after) return s
KEYWORDS = {'and', 'or', 'not', 'True', 'False'}
[docs]def get_identifiers(expr, include_numbers=False): ''' Return all the identifiers in a given string ``expr``, that is everything that matches a programming language variable like expression, which is here implemented as the regexp ``\\b[A-Za-z_][A-Za-z0-9_]*\\b``. Parameters ---------- expr : str The string to analyze include_numbers : bool, optional Whether to include number literals in the output. Defaults to ``False``. Returns ------- identifiers : set A set of all the identifiers (and, optionally, numbers) in `expr`. Examples -------- >>> expr = '3-a*_b+c5+8+f(A - .3e-10, tau_2)*17' >>> ids = get_identifiers(expr) >>> print(sorted(list(ids))) ['A', '_b', 'a', 'c5', 'f', 'tau_2'] >>> ids = get_identifiers(expr, include_numbers=True) >>> print(sorted(list(ids))) ['.3e-10', '17', '3', '8', 'A', '_b', 'a', 'c5', 'f', 'tau_2'] ''' identifiers = set(re.findall(r'\b[A-Za-z_][A-Za-z0-9_]*\b', expr)) if include_numbers: # only the number, not a + or - numbers = set(re.findall(r'(?<=[^A-Za-z_])[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|^[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?', expr)) else: numbers = set() return (identifiers - KEYWORDS) | numbers
[docs]def strip_empty_lines(s): ''' Removes all empty lines from the multi-line string `s`. Examples -------- >>> multiline = """A string with ... ... an empty line.""" >>> print(strip_empty_lines(multiline)) A string with an empty line. ''' return '\n'.join(line for line in s.split('\n') if line.strip())
[docs]def strip_empty_leading_and_trailing_lines(s): ''' Removes all empty leading and trailing lines in the multi-line string `s`. ''' lines = s.split('\n') while lines and not lines[0].strip(): del lines[0] while lines and not lines[-1].strip(): del lines[-1] return '\n'.join(lines)
[docs]def stripped_deindented_lines(code): ''' Returns a list of the lines in a multi-line string, deindented. ''' code = deindent(code) code = strip_empty_lines(code) lines = code.split('\n') return lines
[docs]def code_representation(code): ''' Returns a string representation for several different formats of code Formats covered include: - A single string - A list of statements/strings - A dict of strings - A dict of lists of statements/strings ''' if not isinstance(code, (basestring, list, tuple, dict)): code = str(code) if isinstance(code, basestring): return strip_empty_leading_and_trailing_lines(code) if not isinstance(code, dict): code = {None: code} else: code = code.copy() for k, v in code.items(): if isinstance(v, (list, tuple)): v = '\n'.join([str(line) for line in v]) code[k] = v if len(code)==1 and code.keys()[0] is None: return strip_empty_leading_and_trailing_lines(code.values()[0]) output = [] for k, v in code.iteritems(): msg = 'Key %s:\n' % k msg += indent(str(v)) output.append(msg) return strip_empty_leading_and_trailing_lines('\n'.join(output))
# The below is adapted from Peter Norvig's spelling corrector # http://norvig.com/spell.py (MIT licensed)
[docs]class SpellChecker(object): ''' A simple spell checker that will be used to suggest the correct name if the user made a typo (e.g. for state variable names). Parameters ---------- words : iterable of str The known words alphabet : iterable of str, optional The allowed characters. Defaults to the characters allowed for identifiers, i.e. ascii characters, digits and the underscore. ''' def __init__(self, words, alphabet=string.ascii_lowercase+string.digits+'_'): self.words = words self.alphabet = alphabet
[docs] def edits1(self, word): s = [(word[:i], word[i:]) for i in range(len(word) + 1)] deletes = [a + b[1:] for a, b in s if b] transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b)>1] replaces = [a + c + b[1:] for a, b in s for c in self.alphabet if b] inserts = [a + c + b for a, b in s for c in self.alphabet] return set(deletes + transposes + replaces + inserts)
[docs] def known_edits2(self, word): return set(e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if e2 in self.words)
[docs] def known(self, words): return set(w for w in words if w in self.words)
[docs] def suggest(self, word): return self.known(self.edits1(word)) or self.known_edits2(word) or set()