Skip to content

Instantly share code, notes, and snippets.

@khelwood
Last active August 27, 2023 11:34
Show Gist options
  • Select an option

  • Save khelwood/312d8066f03401a741ee128847ec9927 to your computer and use it in GitHub Desktop.

Select an option

Save khelwood/312d8066f03401a741ee128847ec9927 to your computer and use it in GitHub Desktop.
tool for editing text as a sequence of lines
"""
Tool for editing a large block of text split into lines.
This can make small changes much faster.
Also provides lots of methods for searching and mutating
the text.
"""
import re as _re
from typing import NamedTuple, Match
from functools import partial
class Pos(NamedTuple):
"""An immutable position, line and column."""
line: int
column: int
@classmethod
def of(cls, p):
if isinstance(p, cls):
return p
if isinstance(p, int):
return cls(p, 0)
return cls(*p)
def __bool__(self):
"""True if the line field is non-negative."""
return self[0] >= 0
def right(self, dx):
return Pos(self.li, self.i+dx)
def __add__(self, p):
li,i = p
return Pos(self.li + li, self.i + i)
def __radd__(self, p):
li,i = p
return Pos(li + self.li, i + self.i)
def __sub__(self, p):
li,i = p
return Pos(self.li - li, self.i - i)
def __rsub__(self, p):
li,i = p
return Pos(li-self.li, i-self.i)
# Field aliases
Pos.l = Pos.li = Pos.line
Pos.i = Pos.col = Pos.column
class LineMatch(NamedTuple):
"""A line and a regex match object (or None)."""
line: int
match: Match
def __bool__(self):
"""True if the line field is non-negative."""
return self[0] >= 0
# Field aliases
LineMatch.l = LineMatch.li = LineMatch.line
LineMatch.m = LineMatch.match
# The position of not-found: both fields are -1
NOT_FOUND = Pos(-1,-1)
# The position at the start of the text: both fields are 0
START = Pos(0,0)
# The result of unsuccessful regex search:
# the line is -1 and the match field is None
NO_MATCH = LineMatch(-1, None)
def wrap(i, n):
"""
Puts i into the range of 0 to n inclusive.
If i is negative, add n (like an index).
Clip the result to be 0 <= result <= n.
"""
return max(0, i+n) if i < 0 else min(i, n)
def count_ws(lines:list):
"""
Returns the number of whitespace characters
that are the same at the start of every one of
the given lines.
"""
if not lines:
return 0
it = iter(lines)
pro = next((x for x in it if not x.isspace()), None)
if pro is None:
return 0
w = 0
while w < len(pro) and pro[w].isspace():
w += 1
if w==0:
return 0
for line in it:
cw = 0
while cw < len(line) and cw < w and line[cw]==pro[cw]:
cw += 1
if cw < len(line):
w = cw
if w==0:
return 0
return w
def modify_chunk(line:str, a:int, e:int, f):
"""
Alters a substring of a line, returning the result.
line: string to alter
a: start of region to alter
e: end of region to alter
f: string to replace region, or function to modify region
"""
new = f if isinstance(f, str) else f(line[a:e])
return line[:a] + new + line[e:]
class Editor:
"""A mutable sequence of lines comprising a block of text."""
def __init__(self, lines):
if isinstance(lines, list):
self.lines = lines
elif isinstance(lines, str):
self.lines = lines.split('\n')
else:
self.lines = list(lines)
def __len__(self):
"""The number of lines."""
return len(self.lines)
def __getitem__(self, p):
"""
Gets the specified line(s) if p is an int or slice.
Gets the specified character if p has two coordinates.
"""
if isinstance(p, (int, slice)):
return self.lines[p]
pl,pi = self.check_pos(p)
return self.lines[pl][pi]
def __setitem__(self, p, value):
"""
Sets the specified line(s) if p is an int or slice.
Sets the specified character if p has two coordinates.
"""
if isinstance(p, (int, slice)):
self.lines[p] = value
else:
pl,pi = self.check_pos(p)
self.modify_chunk(pl, pi, pi+1, value)
def check_pos(self, p):
"""
Raises an IndexError if p is not the valid position
of a character in this editor. If it's valid,
it is returned as a Pos. If p is an int it is expanded
to (p,0).
"""
lines = self.lines
n = len(lines)
if isinstance(p, int):
if 0 <= p < n:
return Pos(p, 0)
if -n <= p < 0:
return Pos(p+n, 0)
raise IndexError(f'invalid line index {p}')
li,i = p
if li < 0:
li += n
if not (0 <= li < n):
raise IndexError(f'invalid line index in {p}')
ll = len(lines[li])
if 0 <= i < ll:
return Pos(li, i)
if -ll <= i < 0:
return Pos(li, i+ll)
raise IndexError(f'invalid column index in {p}')
@property
def end(self):
"""
Gets the position of the end of the text
(i.e. line=len(self), col=0)
"""
return Pos(len(self.lines), 0)
def __iter__(self):
"""Iterates the lines in the text."""
return iter(self.lines)
def modify_chunk(self, li, a, e, f):
"""Modifies a chunk of a specified line."""
lines = self.lines
lines[li] = modify_chunk(lines[li], a, e, f)
def wrap(self, p):
"""
Wraps p to a pos in this text.
An int is expanded to (pos,0).
See wrap(i, n) for details of wrapping.
"""
lines = self.lines
nl = len(lines)
if isinstance(p, int):
return Pos(wrap(p, nl), 0)
li,i = p
if li < 0:
li += nl
if li < 0:
return START
if li >= nl:
return Pos(nl, 0)
i = wrap(i, len(lines[li]))
return Pos(li,i)
def wrap_start(self, p):
"""If p is None, returns START; else wraps p."""
return START if p is None else self.wrap(p)
def wrap_end(self, p):
"""If p is None, returns self.end; else wraps p."""
return self.end if p is None else self.wrap(p)
def wrap_range(self, start, end):
"""Wraps the given start and end positions."""
return self.wrap_start(start), self.wrap_end(end)
def join(self):
"""Gets the lines of this text, joined with newlines."""
return '\n'.join(self.lines)
def insert(self, p, text):
"""
Inserts text at the given position.
If p is an int, inserts a new line.
If p has two coordinates, inserts the text at a
point inside a line.
"""
lines = self.lines
nl = len(lines)
if isinstance(p, int):
return lines.insert(p, text)
pl,pi = p
if pl == nl and pi==0:
lines.append(text)
else:
pl,pi = self.check_pos(p)
self.modify_chunk(pl, pi, pi, text)
def delete(self, p, q):
"""
Deletes text between p (incl) and q (excl).
If p and q are ints, deletes the specified lines;
otherwise deletes the text between the given
positions, in one line or across multiple lines.
"""
lines = self.lines
nl = len(lines)
if isinstance(p, int) and isinstance(q, int):
p = wrap(p, nl)
q = wrap(q, nl)
del lines[p:q]
return
p,q = self.wrap_range(p,q)
if p >= q:
return
if p.line==q.line:
pl, a = p
e = q.col
if pl == nl:
if a > 0:
raise IndexError('invalid position')
else:
self.modify_chunk(pl, a, e, '')
elif q.line>=nl:
lines[p.line] = lines[p.line][:p.col]
del lines[p.line+1:]
else:
lines[p.line] = lines[p.line][:p.col] + lines[q.line][q.col:]
del lines[p.line+1 : q.line+1]
def replace(self, p, q, text):
"""Replaces the specifed range with the given text."""
p,q = self.wrap_range(p, q)
if p>=q:
if p==q:
return self.insert(p, text)
raise IndexError('invalid range')
lines = self.lines
nl = len(lines)
pl,a = p
line = lines[pl]
if pl==q.line:
self.modify_chunk(pl, a, q.col, text)
elif q.line>= nl:
lines[pl] = line[:a] + text
del lines[pl:]
else:
self.modify_chunk(pl, a, q.col, text)
del lines[pl+1:q.line+1]
def __iadd__(self, value):
"""Adds the given lines to the end of this text."""
if isinstance(value, Editor):
value = value.lines
elif isinstance(value, str):
value = value.split('\n')
self.lines.extend(value)
return self
def __add__(self, other):
"""
Returns a new editor with this editor's lines
followed by the given lines.
"""
r = type(self)(list(self.lines))
r += other
return r
def append(self, line: str):
"""Appends a line to this editor's lines."""
self.lines.append(line)
def resplit(self):
"""
Following edits, splits any lines containing a
newline character into individual lines.
"""
lines = self.lines
i = 0
changed = False
while i < len(lines):
line = lines[i]
if changed or '\n' in line:
changed = True
para = line.split('\n')
lines[i:i+1] = para
i += len(para)
else:
i += 1
return changed
def find(self, text, pos=None, end=None,
*, after=False, nf=NOT_FOUND):
"""
Finds the position of the first occurrence of the
given string between the given (optional) positions,
or returns NOT_FOUND.
"""
pos, end = self.wrap_range(pos, end)
if pos >= end:
return nf
pl, pi = pos
ql, qi = end
lines = self.lines
nl = len(self)
line = lines[pl]
adj = len(text) if after else 0
if pl==ql:
i = line.find(text, pi, qi)
return Pos(pl, i+adj) if i>=0 else nf
i = line.find(text, pi)
if i >= 0:
return Pos(pl, i+adj)
for pl in range(pl+1, ql):
i = lines[pl].find(text)
if i >= 0:
return Pos(pl, i+adj)
if ql < nl and qi > 0:
i = lines[ql].find(text, 0, qi)
if i >= 0:
return Pos(ql, i+adj)
return nf
def index(self, text, pos=None, end=None,
*, after=False):
"""
Returns the position of the first occurrence of the
given string between the given (optional) positions.
Raises a ValueError if not found.
"""
p = self.find(text, pos, end, after=after)
if not p:
raise ValueError('string not found')
return p
def rfind(self, text, pos=None, end=None,
*, after=False, nf=NOT_FOUND):
""""
Returns the position of the last occurrence of the
given string between the given (optional) positions,
or returns NOT_FOUND.
"""
pos,end = self.wrap_range(pos, end)
if pos >= end:
return nf
lines = self.lines
nl = len(lines)
pl,pi = pos
ql,qi = end
line = lines[pl]
adj = len(text) if after else 0
if pl==ql:
i = lines[pl].rfind(text, pi, qi)
return Pos(pl,i+adj) if i>=0 else nf
if ql < nl and qi > 0:
i = lines[ql].rfind(text, 0, qi)
if i >= 0:
return Pos(ql, i+adj)
for li in range(ql-1, pl, -1):
i = lines[li].rfind(text)
if i >= 0:
return Pos(li, i+adj)
if pl >= 0 and pi < len(lines[pl]):
i = lines[pl].rfind(text, pi)
if i >= 0:
return Pos(pl, i+adj)
return nf
def rindex(self, text, pos=None, end=None,
*, after=False):
"""
Returns the position of the last occurrence of the
given string between the given (optional) positions.
Raises a ValueError if not found.
"""
p = self.rfind(text, pos, end, after=after)
if not p:
raise ValueError('string not found')
return p
def find_iter(self, text, pos=None, end=None,
*, after=False):
"""
Generates all the positions of occurrences
of the given substring between the two
(optional) positions.
"""
pos,end = self.wrap_range(pos, end)
if pos >= end:
return
lines = self.lines
nl = len(self)
lt = len(text)
def poses(li, a=0, e=None, adj=lt if after else 0):
line = lines[li]
if e is None:
e = len(line)
i = line.find(text, a, e)
while i >= 0:
yield Pos(li, i+adj)
i = line.find(text, i+lt, e)
pl, pi = pos
ql, qi = end
if pl==ql:
yield from poses(pl, pi, qi)
return
yield from poses(pl, pi)
for pl in range(pl+1, ql):
yield from poses(pl)
if ql < nl and qi > 0:
yield from poses(ql, 0, qi)
def rfind_iter(self, text, pos=None, end=None,
*, after=False):
"""
Generates all the positions of occurrences
of the given substring between the two
(optional) positions, starting from the end.
"""
pos,end = self.wrap_range(pos, end)
if pos >= end or pos.line >= nl:
return
lines = self.lines
nl = len(self)
lt = len(text)
def poses(li, a=0, e=None, adj=lt if after else 0):
line = lines[li]
if e is None:
e = len(line)
i = line.rfind(text, a, e)
while i >= 0:
yield Pos(li, i+after)
i = line.rfind(text, a, i)
pl, pi = pos
ql, qi = end
if pl==ql:
yield from poses(pl, pi, qi)
return
if ql < nl and qi > 0:
yield from poses(ql, 0, qi)
for pl in range(ql-1, pl):
yield from poses(pl)
if pl >= 0:
yield from poses(pl, pi)
def find_match(self, ptn, pos=None, end=None, nf=NO_MATCH):
"""
Returns the first line number and regex match for
the given pattern between the given (optional)
positions, or returns NO_MATCH.
"""
LM = LineMatch
ptn = _re.compile(ptn)
pos, end = self.wrap_range(pos, end)
lines = self.lines
nl = len(lines)
if pos >= end or pos.line >= nl:
return nf
pl,pi = pos
ql,qi = end
line = lines[pl]
if pl==ql:
m = ptn.search(line, pi, qi)
return LM(pl, m) if m else nf
m = ptn.search(line, pi)
if m:
return LM(pl, m)
for li in range(pl, ql):
m = ptn.search(lines[li])
if m:
return LM(li, m)
if ql < nl and qi > 0:
m = ptn.search(lines[ql], 0, qi)
return LM(ql, m)
return nf
def match_iter(ptn, pos=None, end=None):
"""
Generates all the line number/regex matches for
the given pattern between the given (optional)
positions.
"""
pos,end = self.wrap_range(pos, end)
end = self.wrap_end(end)
if pos >= end:
return
lines = self.lines
nl = len(lines)
ptn = _re.compile(ptn)
def lms(li, *args, LM=LineMatch):
line = lines[li]
return (LM(li, m) for m in ptn.finditer(line, *args))
pl,pi = pos
ql,qi = end
if pl==ql:
yield from lms(pl, pi, qi)
return
if pl >= 0:
yield from lms(pl, pi)
for li in range(pl+1, ql):
yield from lms(li)
if ql < nl and qi > 0:
yield from lms(ql, 0, qi)
def sub(self, old, new, pos=None, end=None):
"""
Replaces the old substring with the new between
the specified (optional) positions.
"""
if pos is end is None:
return self.sub_all(old, new)
pos, end = self.wrap_range(pos, end)
if pos <= end:
return
if pos <= START and end.l >= len(self):
self.sub_all(old, new)
return
if pos.line==end.line:
self.sub_in_line(old, new, pos.line, pos.col, end.col)
return
lines = self.lines
nl = len(lines)
li = next((i for i in range(pos.line, end.line)
if old in lines[i]), -1)
if li < 0:
return
line = lines[li]
if li==pos.line and pos.col > 0:
lines[li] = line[:pos.col] + line[pos.col:].replace(old, new)
li += 1
for li in range(li, end.line):
lines[li] = lines[li].replace(old, new)
if end.line < nl and end.col > 0:
line = lines[li]
lines[li] = line[:end.col].replace(old, new) + line[end.col:]
def sub_in_line(self, old, new, li, a, e):
"""
Replaces the old substring with the new one in
the specified line between the specifed start and
end positions.
"""
a = self.lines[li].find(old, a, e)
if a >= 0:
self.modify_chunk(li, a, e, lambda s: s.replace(old,new))
def sub_all(self, old, new):
"""
Replaces all occurrences of the old substring with
the new one. Returns true if any replacements were
made.
"""
lines = self.lines
nl = len(lines)
pl = next((li for (li,line) in enumerate(lines)
if old in line), -1)
if pl < 0:
return False
for li in range(pl, nl):
lines[li] = lines[li].replace(old, new)
return True
def resub(self, ptn, new, pos=None, end=None):
"""
Performs a regex substitution between the
specified (optional) positions.
"""
if pos is end is None:
return self.resub_all(ptn, new)
pos, end = self.wrap_range(pos, end)
if pos >= end:
return
if pos <= START and end >= self.end:
return self.resub_all(ptn, new)
ptn = _re.compile(ptn)
lines = self.lines
nl = len(lines)
subf = partial(ptn.sub, new)
if pos.l == end.l:
return self.modify_chunk(pos.l, pos.i, end.i, subf)
lines[pos.l] = line[:pos.i] + subf(line[pos.i:])
for li in range(pos.l, end.l):
lines[li] = subf(lines[li])
if end.l < nl and end.i > 0:
line = lines[end.l]
lines[end.l] = subf(line[:end.i]) + line[end.i:]
def resub_all(self, ptn, new):
"""
Performs a regex substitution in every line
of the text. Returns true if any replacements
were performed.
"""
ptn = _re.compile(ptn)
lines = self.lines
found = False
for i, line in enumerate(lines):
if found or ptn.search(line):
found = True
lines[i] = ptn.sub(new, line)
return found
def has_which(self, strings, p):
"""
Which of the given strings occurs at the
given position? Returns None if none do.
Raises an IndexError if the position is out
of the appropriate range.
"""
p = START if p is None else self.check_pos(p)
line = self.lines[p.l]
return next((x for x in strings if line.startswith(x, p.i)),
None)
def show(self, start, end=None):
start = self.wrap_start(start)
if end is None:
end = Pos(start.l+1,0)
else:
end = self.wrap_end(end)
lines = self[start.l:end.l]
if lines and start.i > 0:
lines[0] = lines[0][start.i:]
if end.i > 0:
lines.append(self[end.l][:end.i])
ws = count_ws(lines)
for line in lines:
print(line[ws:].rstrip())
def grep(self, string, start=None, end=None):
if start is None:
start = 0
else:
start = wrap(start, len(self))
if end is None:
end = len(self)
else:
end = wrap(end, len(self))
for i in range(start, end):
line = self[i]
if string in line:
print(i, line.strip())
def regrep(self, ptn, start=None, end=None):
if start is None:
start = 0
else:
start = wrap(start, len(self))
if end is None:
end = len(self)
else:
end = wrap(end, len(self))
ptn = _re.compile(ptn)
for i in range(start, end):
line = self[i]
if _re.search(ptn, line):
print(i, line.strip())
@classmethod
def load(cls, filename, encoding='utf8'):
"""Loads the contents of a file into an editor."""
with open(filename, 'r', encoding=encoding) as fin:
return cls(fin.read())
def save(self, filename, encoding='utf8'):
"""Saves the contents of an editor into a file."""
with open(filename, 'w', encoding=encoding) as fout:
write_strings(fout, self.lines, '\n')
def write_strings(f, strings, sep):
if strings:
it = iter(strings)
f.write(next(it))
for s in it:
f.write(sep)
f.write(s)
def main():
lines = ('Alpha beta gamma delta epsilon zeta eta '
'theta iota kappa lambda mu nu omicron pi rho sigma '
'tau upsilon phi chi psi omega').replace(' ', '\n')
x = Editor(lines)
for i in (0,1):
y = x.rfind('ma') if i else x.find('ma')
print(y)
print(x[y])
print(x[y[0]])
p = x.find_match(_re.compile('m.A',flags=_re.I))
print(p)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment