"""
A collection of tools for string formatting tasks.
"""
import re
import string
__all__ = ['indent',
'deindent',
'word_substitute',
'replace',
'get_identifiers',
'strip_empty_lines',
'stripped_deindented_lines',
'strip_empty_leading_and_trailing_lines',
'code_representation',
'SpellChecker'
]
[docs]def indent(text, numtabs=1, spacespertab=4, tab=None):
'''
Indents a given multiline string.
By default, indentation is done using spaces rather than tab characters.
To use tab characters, specify the tab character explictly, e.g.::
indent(text, tab='\t')
Note that in this case ``spacespertab`` is ignored.
Examples
--------
>>> multiline = """def f(x):
... return x*x"""
>>> print(multiline)
def f(x):
return x*x
>>> print(indent(multiline))
def f(x):
return x*x
>>> print(indent(multiline, numtabs=2))
def f(x):
return x*x
>>> print(indent(multiline, spacespertab=2))
def f(x):
return x*x
>>> print(indent(multiline, tab='####'))
####def f(x):
#### return x*x
'''
if tab is None:
tab = ' '*spacespertab
indent = tab*numtabs
indentedstring = indent+text.replace('\n', '\n'+indent)
return indentedstring
[docs]def deindent(text, numtabs=None, spacespertab=4, docstring=False):
'''
Returns a copy of the string with the common indentation removed.
Note that all tab characters are replaced with ``spacespertab`` spaces.
If the ``docstring`` flag is set, the first line is treated differently and
is assumed to be already correctly tabulated.
If the ``numtabs`` option is given, the amount of indentation to remove is
given explicitly and not the common indentation.
Examples
--------
Normal strings, e.g. function definitions:
>>> multiline = """ def f(x):
... return x**2"""
>>> print(multiline)
def f(x):
return x**2
>>> print(deindent(multiline))
def f(x):
return x**2
>>> print(deindent(multiline, docstring=True))
def f(x):
return x**2
>>> print(deindent(multiline, numtabs=1, spacespertab=2))
def f(x):
return x**2
Docstrings:
>>> docstring = """First docstring line.
... This line determines the indentation."""
>>> print(docstring)
First docstring line.
This line determines the indentation.
>>> print(deindent(docstring, docstring=True))
First docstring line.
This line determines the indentation.
'''
text = text.replace('\t', ' '*spacespertab)
lines = text.split('\n')
# if it's a docstring, we search for the common tabulation starting from
# line 1, otherwise we use all lines
if docstring:
start = 1
else:
start = 0
if docstring and len(lines)<2: # nothing to do
return text
# Find the minimum indentation level
if numtabs is not None:
indentlevel = numtabs*spacespertab
else:
lineseq = [len(line)-len(line.lstrip()) for line in lines[start:] if len(line.strip())]
if len(lineseq)==0:
indentlevel = 0
else:
indentlevel = min(lineseq)
# remove the common indentation
lines[start:] = [line[indentlevel:] for line in lines[start:]]
return '\n'.join(lines)
[docs]def word_substitute(expr, substitutions):
'''
Applies a dict of word substitutions.
The dict ``substitutions`` consists of pairs ``(word, rep)`` where each
word ``word`` appearing in ``expr`` is replaced by ``rep``. Here a 'word'
means anything matching the regexp ``\\bword\\b``.
Examples
--------
>>> expr = 'a*_b+c5+8+f(A)'
>>> print(word_substitute(expr, {'a':'banana', 'f':'func'}))
banana*_b+c5+8+func(A)
'''
for var, replace_var in substitutions.iteritems():
expr = re.sub(r'\b' + var + r'\b', str(replace_var), expr)
return expr
[docs]def replace(s, substitutions):
'''
Applies a dictionary of substitutions. Simpler than `word_substitute`, it
does not attempt to only replace words
'''
for before, after in substitutions.iteritems():
s = s.replace(before, after)
return s
KEYWORDS = {'and', 'or', 'not', 'True', 'False'}
[docs]def get_identifiers(expr, include_numbers=False):
'''
Return all the identifiers in a given string ``expr``, that is everything
that matches a programming language variable like expression, which is
here implemented as the regexp ``\\b[A-Za-z_][A-Za-z0-9_]*\\b``.
Parameters
----------
expr : str
The string to analyze
include_numbers : bool, optional
Whether to include number literals in the output. Defaults to ``False``.
Returns
-------
identifiers : set
A set of all the identifiers (and, optionally, numbers) in `expr`.
Examples
--------
>>> expr = '3-a*_b+c5+8+f(A - .3e-10, tau_2)*17'
>>> ids = get_identifiers(expr)
>>> print(sorted(list(ids)))
['A', '_b', 'a', 'c5', 'f', 'tau_2']
>>> ids = get_identifiers(expr, include_numbers=True)
>>> print(sorted(list(ids)))
['.3e-10', '17', '3', '8', 'A', '_b', 'a', 'c5', 'f', 'tau_2']
'''
identifiers = set(re.findall(r'\b[A-Za-z_][A-Za-z0-9_]*\b', expr))
if include_numbers:
# only the number, not a + or -
numbers = set(re.findall(r'(?<=[^A-Za-z_])[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|^[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?',
expr))
else:
numbers = set()
return (identifiers - KEYWORDS) | numbers
[docs]def strip_empty_lines(s):
'''
Removes all empty lines from the multi-line string `s`.
Examples
--------
>>> multiline = """A string with
...
... an empty line."""
>>> print(strip_empty_lines(multiline))
A string with
an empty line.
'''
return '\n'.join(line for line in s.split('\n') if line.strip())
[docs]def strip_empty_leading_and_trailing_lines(s):
'''
Removes all empty leading and trailing lines in the multi-line string `s`.
'''
lines = s.split('\n')
while lines and not lines[0].strip(): del lines[0]
while lines and not lines[-1].strip(): del lines[-1]
return '\n'.join(lines)
[docs]def stripped_deindented_lines(code):
'''
Returns a list of the lines in a multi-line string, deindented.
'''
code = deindent(code)
code = strip_empty_lines(code)
lines = code.split('\n')
return lines
[docs]def code_representation(code):
'''
Returns a string representation for several different formats of code
Formats covered include:
- A single string
- A list of statements/strings
- A dict of strings
- A dict of lists of statements/strings
'''
if not isinstance(code, (basestring, list, tuple, dict)):
code = str(code)
if isinstance(code, basestring):
return strip_empty_leading_and_trailing_lines(code)
if not isinstance(code, dict):
code = {None: code}
else:
code = code.copy()
for k, v in code.items():
if isinstance(v, (list, tuple)):
v = '\n'.join([str(line) for line in v])
code[k] = v
if len(code)==1 and code.keys()[0] is None:
return strip_empty_leading_and_trailing_lines(code.values()[0])
output = []
for k, v in code.iteritems():
msg = 'Key %s:\n' % k
msg += indent(str(v))
output.append(msg)
return strip_empty_leading_and_trailing_lines('\n'.join(output))
# The below is adapted from Peter Norvig's spelling corrector
# http://norvig.com/spell.py (MIT licensed)
[docs]class SpellChecker(object):
'''
A simple spell checker that will be used to suggest the correct name if the
user made a typo (e.g. for state variable names).
Parameters
----------
words : iterable of str
The known words
alphabet : iterable of str, optional
The allowed characters. Defaults to the characters allowed for
identifiers, i.e. ascii characters, digits and the underscore.
'''
def __init__(self, words,
alphabet=string.ascii_lowercase+string.digits+'_'):
self.words = words
self.alphabet = alphabet
[docs] def edits1(self, word):
s = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [a + b[1:] for a, b in s if b]
transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b)>1]
replaces = [a + c + b[1:] for a, b in s for c in self.alphabet if b]
inserts = [a + c + b for a, b in s for c in self.alphabet]
return set(deletes + transposes + replaces + inserts)
[docs] def known_edits2(self, word):
return set(e2 for e1 in self.edits1(word)
for e2 in self.edits1(e1) if e2 in self.words)
[docs] def known(self, words):
return set(w for w in words if w in self.words)
[docs] def suggest(self, word):
return self.known(self.edits1(word)) or self.known_edits2(word) or set()