Created
October 26, 2025 22:32
-
-
Save khelwood/16fda7869b5fdb0f3fce80f98e083705 to your computer and use it in GitHub Desktop.
Tool for doing lots of text replacements in different ways
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import re | |
| from collections import defaultdict | |
| class PtnReps: | |
| def __init__(self, flags, reps): | |
| parts = [] | |
| gr = {} | |
| for k,v in reps.items(): | |
| gp = 'g%s'%len(parts) | |
| parts.append('(?P<%s>%s)'%(gp, k)) | |
| gr[gp] = v | |
| self.ptn = re.compile('|'.join(parts), flags=flags) | |
| self.reps = gr | |
| def repl(self, match): | |
| reps = self.reps | |
| for g,s in match.groupdict().items(): | |
| if s is not None: | |
| r = reps[g] | |
| return r if isinstance(r, str) else r(s) | |
| raise ValueError('No replacement found for %r.'%(m,)) | |
| def __call__(self, string): | |
| return re.sub(self.ptn, self.repl, string) | |
| class CombinedReplace: | |
| def __init__(self, master=None, **kwargs): | |
| self._master = master | |
| self._check_kwargs(**kwargs) | |
| self.kwargs = kwargs | |
| self.flagreplacements = {} | |
| self._ops = None | |
| def with_args(self, **kwargs): | |
| master = self if self._master is None else self._master | |
| kw = self.kwargs | kwargs | |
| return CombinedReplace(master, **kw) | |
| def _check_kwargs(self, escape=None, whole_word=None, match_caps=None, flags=0): | |
| t = type(flags) | |
| if t not in (int, re.RegexFlag): | |
| raise TypeError("flags should be int or RegexFlag, not "+t.__name__) | |
| def add(self, string, repl, **kwargs): | |
| self._check_kwargs(**kwargs) | |
| kw = self.kwargs | kwargs | |
| if self._master is not None: | |
| self._master.add(string, repl, **kw) | |
| return self | |
| if kw.get('escape', False): | |
| string = re.escape(string) | |
| if kw.get('whole_word', False): | |
| string = r'\b' + string + r'\b' | |
| if kw.get('match_caps', False): | |
| repl = apply_match_caps(repl) | |
| flags = kw.get('flags', 0) | |
| reps = self.flagreplacements.get(flags) | |
| if reps is None: | |
| reps = self.flagreplacements[flags] = {} | |
| reps[string] = repl | |
| return self | |
| def add_look(self, before, target, after, repl, | |
| behind_neg=False, ahead_neg=False, **kwargs): | |
| self._check_kwargs(**kwargs) | |
| kw = self.kwargs | kwargs | |
| if self._master is not None: | |
| self._master.add_look(before, target, after, repl, **kw) | |
| return self | |
| if kw.get('escape', False): | |
| target = re.escape(target) | |
| if kw.get('match_caps', False): | |
| repl = apply_match_caps(repl) | |
| ptn = build_look_pattern(before, target, after, behind_neg, ahead_neg) | |
| flags = kw.get('flags', 0) | |
| reps = self.flagreplacements.get(flags) | |
| if reps is None: | |
| reps = self.flagreplacements[flags] = {} | |
| reps[ptn] = repl | |
| return self | |
| def add_string(self, string, repl, **kwargs): | |
| kw = {'escape':True, **kwargs} | |
| return self.add(string, repl, **kw) | |
| def add_word(self, string, repl, **kwargs): | |
| kw = {'whole_word':True, **kwargs} | |
| return self.add(string, repl, **kw) | |
| def add_pattern(self, ptn, repl, **kwargs): | |
| kw = {'escape':False, **kwargs} | |
| if isinstance(ptn, re.Pattern): | |
| ptn = ptn.pattern | |
| return self.add(ptn, repl, **kw) | |
| def build(self): | |
| if self._master is not None: | |
| return self._master.build() | |
| self._ops = [PtnReps(flags, reps) for flags, reps in self.flagreplacements.items()] | |
| return self | |
| def __call__(self, string): | |
| if self._master is not None: | |
| return self._master(string) | |
| if self._ops is None: | |
| self.build() | |
| for op in self._ops: | |
| string = op(string) | |
| return string | |
| def look_pattern(s, behind, neg): | |
| if behind: | |
| tpt = '(?<!%s)' if neg else '(?<=%s)' | |
| else: | |
| tpt = '(?!%s)' if neg else '(?=%s)' | |
| if isinstance(s, str): | |
| return tpt%s | |
| lgs = group_to_lists(s, len) | |
| if len(lgs)==1: | |
| vs = next(iter(lgs.values())) | |
| return tpt%('|'.join(vs)) | |
| gps = [tpt%('|'.join(vs)) for vs in lgs.values()] | |
| return '(?:%s)'%('|'.join(gps)) | |
| def group_to_lists(items, fn): | |
| d = defaultdict(list) | |
| for item in items: | |
| d[fn(item)].append(item) | |
| return d | |
| def build_look_pattern(before, target, after, before_neg, after_neg): | |
| bef = look_pattern(before, True, before_neg) if before else '' | |
| aft = look_pattern(after, False, after_neg) if after else '' | |
| return bef + (target or '') + aft | |
| def apply_match_caps(repl): | |
| if isinstance(repl, str): | |
| if all(ch.lower()==ch.upper() for ch in repl): | |
| return repl | |
| return lambda s : copy_case(s, repl) | |
| return lambda s : copy_case(s, repl(s)) | |
| def copy_case(src, dst): | |
| if src.islower(): | |
| return dst.lower() | |
| if src.istitle(): | |
| return dst.title() if ' ' in src else dst.capitalize() | |
| if src.isupper(): | |
| return dst.upper() | |
| dst = list(dst) | |
| uc = 0 | |
| for i,ch in enumerate(dst): | |
| if i < len(src): | |
| if src[i].isupper(): | |
| uc = 1 | |
| elif src[i].islower(): | |
| uc = -1 | |
| if uc==1: | |
| dst[i] = ch.upper() | |
| elif uc==-1: | |
| dst[i] = ch.lower() | |
| return ''.join(dst) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment