Last active
August 27, 2023 11:34
-
-
Save khelwood/312d8066f03401a741ee128847ec9927 to your computer and use it in GitHub Desktop.
tool for editing text as a sequence of lines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Tool for editing a large block of text split into lines. | |
| This can make small changes much faster. | |
| Also provides lots of methods for searching and mutating | |
| the text. | |
| """ | |
| import re as _re | |
| from typing import NamedTuple, Match | |
| from functools import partial | |
| class Pos(NamedTuple): | |
| """An immutable position, line and column.""" | |
| line: int | |
| column: int | |
| @classmethod | |
| def of(cls, p): | |
| if isinstance(p, cls): | |
| return p | |
| if isinstance(p, int): | |
| return cls(p, 0) | |
| return cls(*p) | |
| def __bool__(self): | |
| """True if the line field is non-negative.""" | |
| return self[0] >= 0 | |
| def right(self, dx): | |
| return Pos(self.li, self.i+dx) | |
| def __add__(self, p): | |
| li,i = p | |
| return Pos(self.li + li, self.i + i) | |
| def __radd__(self, p): | |
| li,i = p | |
| return Pos(li + self.li, i + self.i) | |
| def __sub__(self, p): | |
| li,i = p | |
| return Pos(self.li - li, self.i - i) | |
| def __rsub__(self, p): | |
| li,i = p | |
| return Pos(li-self.li, i-self.i) | |
| # Field aliases | |
| Pos.l = Pos.li = Pos.line | |
| Pos.i = Pos.col = Pos.column | |
| class LineMatch(NamedTuple): | |
| """A line and a regex match object (or None).""" | |
| line: int | |
| match: Match | |
| def __bool__(self): | |
| """True if the line field is non-negative.""" | |
| return self[0] >= 0 | |
| # Field aliases | |
| LineMatch.l = LineMatch.li = LineMatch.line | |
| LineMatch.m = LineMatch.match | |
| # The position of not-found: both fields are -1 | |
| NOT_FOUND = Pos(-1,-1) | |
| # The position at the start of the text: both fields are 0 | |
| START = Pos(0,0) | |
| # The result of unsuccessful regex search: | |
| # the line is -1 and the match field is None | |
| NO_MATCH = LineMatch(-1, None) | |
| def wrap(i, n): | |
| """ | |
| Puts i into the range of 0 to n inclusive. | |
| If i is negative, add n (like an index). | |
| Clip the result to be 0 <= result <= n. | |
| """ | |
| return max(0, i+n) if i < 0 else min(i, n) | |
| def count_ws(lines:list): | |
| """ | |
| Returns the number of whitespace characters | |
| that are the same at the start of every one of | |
| the given lines. | |
| """ | |
| if not lines: | |
| return 0 | |
| it = iter(lines) | |
| pro = next((x for x in it if not x.isspace()), None) | |
| if pro is None: | |
| return 0 | |
| w = 0 | |
| while w < len(pro) and pro[w].isspace(): | |
| w += 1 | |
| if w==0: | |
| return 0 | |
| for line in it: | |
| cw = 0 | |
| while cw < len(line) and cw < w and line[cw]==pro[cw]: | |
| cw += 1 | |
| if cw < len(line): | |
| w = cw | |
| if w==0: | |
| return 0 | |
| return w | |
| def modify_chunk(line:str, a:int, e:int, f): | |
| """ | |
| Alters a substring of a line, returning the result. | |
| line: string to alter | |
| a: start of region to alter | |
| e: end of region to alter | |
| f: string to replace region, or function to modify region | |
| """ | |
| new = f if isinstance(f, str) else f(line[a:e]) | |
| return line[:a] + new + line[e:] | |
| class Editor: | |
| """A mutable sequence of lines comprising a block of text.""" | |
| def __init__(self, lines): | |
| if isinstance(lines, list): | |
| self.lines = lines | |
| elif isinstance(lines, str): | |
| self.lines = lines.split('\n') | |
| else: | |
| self.lines = list(lines) | |
| def __len__(self): | |
| """The number of lines.""" | |
| return len(self.lines) | |
| def __getitem__(self, p): | |
| """ | |
| Gets the specified line(s) if p is an int or slice. | |
| Gets the specified character if p has two coordinates. | |
| """ | |
| if isinstance(p, (int, slice)): | |
| return self.lines[p] | |
| pl,pi = self.check_pos(p) | |
| return self.lines[pl][pi] | |
| def __setitem__(self, p, value): | |
| """ | |
| Sets the specified line(s) if p is an int or slice. | |
| Sets the specified character if p has two coordinates. | |
| """ | |
| if isinstance(p, (int, slice)): | |
| self.lines[p] = value | |
| else: | |
| pl,pi = self.check_pos(p) | |
| self.modify_chunk(pl, pi, pi+1, value) | |
| def check_pos(self, p): | |
| """ | |
| Raises an IndexError if p is not the valid position | |
| of a character in this editor. If it's valid, | |
| it is returned as a Pos. If p is an int it is expanded | |
| to (p,0). | |
| """ | |
| lines = self.lines | |
| n = len(lines) | |
| if isinstance(p, int): | |
| if 0 <= p < n: | |
| return Pos(p, 0) | |
| if -n <= p < 0: | |
| return Pos(p+n, 0) | |
| raise IndexError(f'invalid line index {p}') | |
| li,i = p | |
| if li < 0: | |
| li += n | |
| if not (0 <= li < n): | |
| raise IndexError(f'invalid line index in {p}') | |
| ll = len(lines[li]) | |
| if 0 <= i < ll: | |
| return Pos(li, i) | |
| if -ll <= i < 0: | |
| return Pos(li, i+ll) | |
| raise IndexError(f'invalid column index in {p}') | |
| @property | |
| def end(self): | |
| """ | |
| Gets the position of the end of the text | |
| (i.e. line=len(self), col=0) | |
| """ | |
| return Pos(len(self.lines), 0) | |
| def __iter__(self): | |
| """Iterates the lines in the text.""" | |
| return iter(self.lines) | |
| def modify_chunk(self, li, a, e, f): | |
| """Modifies a chunk of a specified line.""" | |
| lines = self.lines | |
| lines[li] = modify_chunk(lines[li], a, e, f) | |
| def wrap(self, p): | |
| """ | |
| Wraps p to a pos in this text. | |
| An int is expanded to (pos,0). | |
| See wrap(i, n) for details of wrapping. | |
| """ | |
| lines = self.lines | |
| nl = len(lines) | |
| if isinstance(p, int): | |
| return Pos(wrap(p, nl), 0) | |
| li,i = p | |
| if li < 0: | |
| li += nl | |
| if li < 0: | |
| return START | |
| if li >= nl: | |
| return Pos(nl, 0) | |
| i = wrap(i, len(lines[li])) | |
| return Pos(li,i) | |
| def wrap_start(self, p): | |
| """If p is None, returns START; else wraps p.""" | |
| return START if p is None else self.wrap(p) | |
| def wrap_end(self, p): | |
| """If p is None, returns self.end; else wraps p.""" | |
| return self.end if p is None else self.wrap(p) | |
| def wrap_range(self, start, end): | |
| """Wraps the given start and end positions.""" | |
| return self.wrap_start(start), self.wrap_end(end) | |
| def join(self): | |
| """Gets the lines of this text, joined with newlines.""" | |
| return '\n'.join(self.lines) | |
| def insert(self, p, text): | |
| """ | |
| Inserts text at the given position. | |
| If p is an int, inserts a new line. | |
| If p has two coordinates, inserts the text at a | |
| point inside a line. | |
| """ | |
| lines = self.lines | |
| nl = len(lines) | |
| if isinstance(p, int): | |
| return lines.insert(p, text) | |
| pl,pi = p | |
| if pl == nl and pi==0: | |
| lines.append(text) | |
| else: | |
| pl,pi = self.check_pos(p) | |
| self.modify_chunk(pl, pi, pi, text) | |
| def delete(self, p, q): | |
| """ | |
| Deletes text between p (incl) and q (excl). | |
| If p and q are ints, deletes the specified lines; | |
| otherwise deletes the text between the given | |
| positions, in one line or across multiple lines. | |
| """ | |
| lines = self.lines | |
| nl = len(lines) | |
| if isinstance(p, int) and isinstance(q, int): | |
| p = wrap(p, nl) | |
| q = wrap(q, nl) | |
| del lines[p:q] | |
| return | |
| p,q = self.wrap_range(p,q) | |
| if p >= q: | |
| return | |
| if p.line==q.line: | |
| pl, a = p | |
| e = q.col | |
| if pl == nl: | |
| if a > 0: | |
| raise IndexError('invalid position') | |
| else: | |
| self.modify_chunk(pl, a, e, '') | |
| elif q.line>=nl: | |
| lines[p.line] = lines[p.line][:p.col] | |
| del lines[p.line+1:] | |
| else: | |
| lines[p.line] = lines[p.line][:p.col] + lines[q.line][q.col:] | |
| del lines[p.line+1 : q.line+1] | |
| def replace(self, p, q, text): | |
| """Replaces the specifed range with the given text.""" | |
| p,q = self.wrap_range(p, q) | |
| if p>=q: | |
| if p==q: | |
| return self.insert(p, text) | |
| raise IndexError('invalid range') | |
| lines = self.lines | |
| nl = len(lines) | |
| pl,a = p | |
| line = lines[pl] | |
| if pl==q.line: | |
| self.modify_chunk(pl, a, q.col, text) | |
| elif q.line>= nl: | |
| lines[pl] = line[:a] + text | |
| del lines[pl:] | |
| else: | |
| self.modify_chunk(pl, a, q.col, text) | |
| del lines[pl+1:q.line+1] | |
| def __iadd__(self, value): | |
| """Adds the given lines to the end of this text.""" | |
| if isinstance(value, Editor): | |
| value = value.lines | |
| elif isinstance(value, str): | |
| value = value.split('\n') | |
| self.lines.extend(value) | |
| return self | |
| def __add__(self, other): | |
| """ | |
| Returns a new editor with this editor's lines | |
| followed by the given lines. | |
| """ | |
| r = type(self)(list(self.lines)) | |
| r += other | |
| return r | |
| def append(self, line: str): | |
| """Appends a line to this editor's lines.""" | |
| self.lines.append(line) | |
| def resplit(self): | |
| """ | |
| Following edits, splits any lines containing a | |
| newline character into individual lines. | |
| """ | |
| lines = self.lines | |
| i = 0 | |
| changed = False | |
| while i < len(lines): | |
| line = lines[i] | |
| if changed or '\n' in line: | |
| changed = True | |
| para = line.split('\n') | |
| lines[i:i+1] = para | |
| i += len(para) | |
| else: | |
| i += 1 | |
| return changed | |
| def find(self, text, pos=None, end=None, | |
| *, after=False, nf=NOT_FOUND): | |
| """ | |
| Finds the position of the first occurrence of the | |
| given string between the given (optional) positions, | |
| or returns NOT_FOUND. | |
| """ | |
| pos, end = self.wrap_range(pos, end) | |
| if pos >= end: | |
| return nf | |
| pl, pi = pos | |
| ql, qi = end | |
| lines = self.lines | |
| nl = len(self) | |
| line = lines[pl] | |
| adj = len(text) if after else 0 | |
| if pl==ql: | |
| i = line.find(text, pi, qi) | |
| return Pos(pl, i+adj) if i>=0 else nf | |
| i = line.find(text, pi) | |
| if i >= 0: | |
| return Pos(pl, i+adj) | |
| for pl in range(pl+1, ql): | |
| i = lines[pl].find(text) | |
| if i >= 0: | |
| return Pos(pl, i+adj) | |
| if ql < nl and qi > 0: | |
| i = lines[ql].find(text, 0, qi) | |
| if i >= 0: | |
| return Pos(ql, i+adj) | |
| return nf | |
| def index(self, text, pos=None, end=None, | |
| *, after=False): | |
| """ | |
| Returns the position of the first occurrence of the | |
| given string between the given (optional) positions. | |
| Raises a ValueError if not found. | |
| """ | |
| p = self.find(text, pos, end, after=after) | |
| if not p: | |
| raise ValueError('string not found') | |
| return p | |
| def rfind(self, text, pos=None, end=None, | |
| *, after=False, nf=NOT_FOUND): | |
| """" | |
| Returns the position of the last occurrence of the | |
| given string between the given (optional) positions, | |
| or returns NOT_FOUND. | |
| """ | |
| pos,end = self.wrap_range(pos, end) | |
| if pos >= end: | |
| return nf | |
| lines = self.lines | |
| nl = len(lines) | |
| pl,pi = pos | |
| ql,qi = end | |
| line = lines[pl] | |
| adj = len(text) if after else 0 | |
| if pl==ql: | |
| i = lines[pl].rfind(text, pi, qi) | |
| return Pos(pl,i+adj) if i>=0 else nf | |
| if ql < nl and qi > 0: | |
| i = lines[ql].rfind(text, 0, qi) | |
| if i >= 0: | |
| return Pos(ql, i+adj) | |
| for li in range(ql-1, pl, -1): | |
| i = lines[li].rfind(text) | |
| if i >= 0: | |
| return Pos(li, i+adj) | |
| if pl >= 0 and pi < len(lines[pl]): | |
| i = lines[pl].rfind(text, pi) | |
| if i >= 0: | |
| return Pos(pl, i+adj) | |
| return nf | |
| def rindex(self, text, pos=None, end=None, | |
| *, after=False): | |
| """ | |
| Returns the position of the last occurrence of the | |
| given string between the given (optional) positions. | |
| Raises a ValueError if not found. | |
| """ | |
| p = self.rfind(text, pos, end, after=after) | |
| if not p: | |
| raise ValueError('string not found') | |
| return p | |
| def find_iter(self, text, pos=None, end=None, | |
| *, after=False): | |
| """ | |
| Generates all the positions of occurrences | |
| of the given substring between the two | |
| (optional) positions. | |
| """ | |
| pos,end = self.wrap_range(pos, end) | |
| if pos >= end: | |
| return | |
| lines = self.lines | |
| nl = len(self) | |
| lt = len(text) | |
| def poses(li, a=0, e=None, adj=lt if after else 0): | |
| line = lines[li] | |
| if e is None: | |
| e = len(line) | |
| i = line.find(text, a, e) | |
| while i >= 0: | |
| yield Pos(li, i+adj) | |
| i = line.find(text, i+lt, e) | |
| pl, pi = pos | |
| ql, qi = end | |
| if pl==ql: | |
| yield from poses(pl, pi, qi) | |
| return | |
| yield from poses(pl, pi) | |
| for pl in range(pl+1, ql): | |
| yield from poses(pl) | |
| if ql < nl and qi > 0: | |
| yield from poses(ql, 0, qi) | |
| def rfind_iter(self, text, pos=None, end=None, | |
| *, after=False): | |
| """ | |
| Generates all the positions of occurrences | |
| of the given substring between the two | |
| (optional) positions, starting from the end. | |
| """ | |
| pos,end = self.wrap_range(pos, end) | |
| if pos >= end or pos.line >= nl: | |
| return | |
| lines = self.lines | |
| nl = len(self) | |
| lt = len(text) | |
| def poses(li, a=0, e=None, adj=lt if after else 0): | |
| line = lines[li] | |
| if e is None: | |
| e = len(line) | |
| i = line.rfind(text, a, e) | |
| while i >= 0: | |
| yield Pos(li, i+after) | |
| i = line.rfind(text, a, i) | |
| pl, pi = pos | |
| ql, qi = end | |
| if pl==ql: | |
| yield from poses(pl, pi, qi) | |
| return | |
| if ql < nl and qi > 0: | |
| yield from poses(ql, 0, qi) | |
| for pl in range(ql-1, pl): | |
| yield from poses(pl) | |
| if pl >= 0: | |
| yield from poses(pl, pi) | |
| def find_match(self, ptn, pos=None, end=None, nf=NO_MATCH): | |
| """ | |
| Returns the first line number and regex match for | |
| the given pattern between the given (optional) | |
| positions, or returns NO_MATCH. | |
| """ | |
| LM = LineMatch | |
| ptn = _re.compile(ptn) | |
| pos, end = self.wrap_range(pos, end) | |
| lines = self.lines | |
| nl = len(lines) | |
| if pos >= end or pos.line >= nl: | |
| return nf | |
| pl,pi = pos | |
| ql,qi = end | |
| line = lines[pl] | |
| if pl==ql: | |
| m = ptn.search(line, pi, qi) | |
| return LM(pl, m) if m else nf | |
| m = ptn.search(line, pi) | |
| if m: | |
| return LM(pl, m) | |
| for li in range(pl, ql): | |
| m = ptn.search(lines[li]) | |
| if m: | |
| return LM(li, m) | |
| if ql < nl and qi > 0: | |
| m = ptn.search(lines[ql], 0, qi) | |
| return LM(ql, m) | |
| return nf | |
| def match_iter(ptn, pos=None, end=None): | |
| """ | |
| Generates all the line number/regex matches for | |
| the given pattern between the given (optional) | |
| positions. | |
| """ | |
| pos,end = self.wrap_range(pos, end) | |
| end = self.wrap_end(end) | |
| if pos >= end: | |
| return | |
| lines = self.lines | |
| nl = len(lines) | |
| ptn = _re.compile(ptn) | |
| def lms(li, *args, LM=LineMatch): | |
| line = lines[li] | |
| return (LM(li, m) for m in ptn.finditer(line, *args)) | |
| pl,pi = pos | |
| ql,qi = end | |
| if pl==ql: | |
| yield from lms(pl, pi, qi) | |
| return | |
| if pl >= 0: | |
| yield from lms(pl, pi) | |
| for li in range(pl+1, ql): | |
| yield from lms(li) | |
| if ql < nl and qi > 0: | |
| yield from lms(ql, 0, qi) | |
| def sub(self, old, new, pos=None, end=None): | |
| """ | |
| Replaces the old substring with the new between | |
| the specified (optional) positions. | |
| """ | |
| if pos is end is None: | |
| return self.sub_all(old, new) | |
| pos, end = self.wrap_range(pos, end) | |
| if pos <= end: | |
| return | |
| if pos <= START and end.l >= len(self): | |
| self.sub_all(old, new) | |
| return | |
| if pos.line==end.line: | |
| self.sub_in_line(old, new, pos.line, pos.col, end.col) | |
| return | |
| lines = self.lines | |
| nl = len(lines) | |
| li = next((i for i in range(pos.line, end.line) | |
| if old in lines[i]), -1) | |
| if li < 0: | |
| return | |
| line = lines[li] | |
| if li==pos.line and pos.col > 0: | |
| lines[li] = line[:pos.col] + line[pos.col:].replace(old, new) | |
| li += 1 | |
| for li in range(li, end.line): | |
| lines[li] = lines[li].replace(old, new) | |
| if end.line < nl and end.col > 0: | |
| line = lines[li] | |
| lines[li] = line[:end.col].replace(old, new) + line[end.col:] | |
| def sub_in_line(self, old, new, li, a, e): | |
| """ | |
| Replaces the old substring with the new one in | |
| the specified line between the specifed start and | |
| end positions. | |
| """ | |
| a = self.lines[li].find(old, a, e) | |
| if a >= 0: | |
| self.modify_chunk(li, a, e, lambda s: s.replace(old,new)) | |
| def sub_all(self, old, new): | |
| """ | |
| Replaces all occurrences of the old substring with | |
| the new one. Returns true if any replacements were | |
| made. | |
| """ | |
| lines = self.lines | |
| nl = len(lines) | |
| pl = next((li for (li,line) in enumerate(lines) | |
| if old in line), -1) | |
| if pl < 0: | |
| return False | |
| for li in range(pl, nl): | |
| lines[li] = lines[li].replace(old, new) | |
| return True | |
| def resub(self, ptn, new, pos=None, end=None): | |
| """ | |
| Performs a regex substitution between the | |
| specified (optional) positions. | |
| """ | |
| if pos is end is None: | |
| return self.resub_all(ptn, new) | |
| pos, end = self.wrap_range(pos, end) | |
| if pos >= end: | |
| return | |
| if pos <= START and end >= self.end: | |
| return self.resub_all(ptn, new) | |
| ptn = _re.compile(ptn) | |
| lines = self.lines | |
| nl = len(lines) | |
| subf = partial(ptn.sub, new) | |
| if pos.l == end.l: | |
| return self.modify_chunk(pos.l, pos.i, end.i, subf) | |
| lines[pos.l] = line[:pos.i] + subf(line[pos.i:]) | |
| for li in range(pos.l, end.l): | |
| lines[li] = subf(lines[li]) | |
| if end.l < nl and end.i > 0: | |
| line = lines[end.l] | |
| lines[end.l] = subf(line[:end.i]) + line[end.i:] | |
| def resub_all(self, ptn, new): | |
| """ | |
| Performs a regex substitution in every line | |
| of the text. Returns true if any replacements | |
| were performed. | |
| """ | |
| ptn = _re.compile(ptn) | |
| lines = self.lines | |
| found = False | |
| for i, line in enumerate(lines): | |
| if found or ptn.search(line): | |
| found = True | |
| lines[i] = ptn.sub(new, line) | |
| return found | |
| def has_which(self, strings, p): | |
| """ | |
| Which of the given strings occurs at the | |
| given position? Returns None if none do. | |
| Raises an IndexError if the position is out | |
| of the appropriate range. | |
| """ | |
| p = START if p is None else self.check_pos(p) | |
| line = self.lines[p.l] | |
| return next((x for x in strings if line.startswith(x, p.i)), | |
| None) | |
| def show(self, start, end=None): | |
| start = self.wrap_start(start) | |
| if end is None: | |
| end = Pos(start.l+1,0) | |
| else: | |
| end = self.wrap_end(end) | |
| lines = self[start.l:end.l] | |
| if lines and start.i > 0: | |
| lines[0] = lines[0][start.i:] | |
| if end.i > 0: | |
| lines.append(self[end.l][:end.i]) | |
| ws = count_ws(lines) | |
| for line in lines: | |
| print(line[ws:].rstrip()) | |
| def grep(self, string, start=None, end=None): | |
| if start is None: | |
| start = 0 | |
| else: | |
| start = wrap(start, len(self)) | |
| if end is None: | |
| end = len(self) | |
| else: | |
| end = wrap(end, len(self)) | |
| for i in range(start, end): | |
| line = self[i] | |
| if string in line: | |
| print(i, line.strip()) | |
| def regrep(self, ptn, start=None, end=None): | |
| if start is None: | |
| start = 0 | |
| else: | |
| start = wrap(start, len(self)) | |
| if end is None: | |
| end = len(self) | |
| else: | |
| end = wrap(end, len(self)) | |
| ptn = _re.compile(ptn) | |
| for i in range(start, end): | |
| line = self[i] | |
| if _re.search(ptn, line): | |
| print(i, line.strip()) | |
| @classmethod | |
| def load(cls, filename, encoding='utf8'): | |
| """Loads the contents of a file into an editor.""" | |
| with open(filename, 'r', encoding=encoding) as fin: | |
| return cls(fin.read()) | |
| def save(self, filename, encoding='utf8'): | |
| """Saves the contents of an editor into a file.""" | |
| with open(filename, 'w', encoding=encoding) as fout: | |
| write_strings(fout, self.lines, '\n') | |
| def write_strings(f, strings, sep): | |
| if strings: | |
| it = iter(strings) | |
| f.write(next(it)) | |
| for s in it: | |
| f.write(sep) | |
| f.write(s) | |
| def main(): | |
| lines = ('Alpha beta gamma delta epsilon zeta eta ' | |
| 'theta iota kappa lambda mu nu omicron pi rho sigma ' | |
| 'tau upsilon phi chi psi omega').replace(' ', '\n') | |
| x = Editor(lines) | |
| for i in (0,1): | |
| y = x.rfind('ma') if i else x.find('ma') | |
| print(y) | |
| print(x[y]) | |
| print(x[y[0]]) | |
| p = x.find_match(_re.compile('m.A',flags=_re.I)) | |
| print(p) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment