Skip to content

Instantly share code, notes, and snippets.

@khelwood
Created October 26, 2025 22:32
Show Gist options
  • Select an option

  • Save khelwood/16fda7869b5fdb0f3fce80f98e083705 to your computer and use it in GitHub Desktop.

Select an option

Save khelwood/16fda7869b5fdb0f3fce80f98e083705 to your computer and use it in GitHub Desktop.
Tool for doing lots of text replacements in different ways
#!/usr/bin/env python3
import re
from collections import defaultdict
class PtnReps:
def __init__(self, flags, reps):
parts = []
gr = {}
for k,v in reps.items():
gp = 'g%s'%len(parts)
parts.append('(?P<%s>%s)'%(gp, k))
gr[gp] = v
self.ptn = re.compile('|'.join(parts), flags=flags)
self.reps = gr
def repl(self, match):
reps = self.reps
for g,s in match.groupdict().items():
if s is not None:
r = reps[g]
return r if isinstance(r, str) else r(s)
raise ValueError('No replacement found for %r.'%(m,))
def __call__(self, string):
return re.sub(self.ptn, self.repl, string)
class CombinedReplace:
def __init__(self, master=None, **kwargs):
self._master = master
self._check_kwargs(**kwargs)
self.kwargs = kwargs
self.flagreplacements = {}
self._ops = None
def with_args(self, **kwargs):
master = self if self._master is None else self._master
kw = self.kwargs | kwargs
return CombinedReplace(master, **kw)
def _check_kwargs(self, escape=None, whole_word=None, match_caps=None, flags=0):
t = type(flags)
if t not in (int, re.RegexFlag):
raise TypeError("flags should be int or RegexFlag, not "+t.__name__)
def add(self, string, repl, **kwargs):
self._check_kwargs(**kwargs)
kw = self.kwargs | kwargs
if self._master is not None:
self._master.add(string, repl, **kw)
return self
if kw.get('escape', False):
string = re.escape(string)
if kw.get('whole_word', False):
string = r'\b' + string + r'\b'
if kw.get('match_caps', False):
repl = apply_match_caps(repl)
flags = kw.get('flags', 0)
reps = self.flagreplacements.get(flags)
if reps is None:
reps = self.flagreplacements[flags] = {}
reps[string] = repl
return self
def add_look(self, before, target, after, repl,
behind_neg=False, ahead_neg=False, **kwargs):
self._check_kwargs(**kwargs)
kw = self.kwargs | kwargs
if self._master is not None:
self._master.add_look(before, target, after, repl, **kw)
return self
if kw.get('escape', False):
target = re.escape(target)
if kw.get('match_caps', False):
repl = apply_match_caps(repl)
ptn = build_look_pattern(before, target, after, behind_neg, ahead_neg)
flags = kw.get('flags', 0)
reps = self.flagreplacements.get(flags)
if reps is None:
reps = self.flagreplacements[flags] = {}
reps[ptn] = repl
return self
def add_string(self, string, repl, **kwargs):
kw = {'escape':True, **kwargs}
return self.add(string, repl, **kw)
def add_word(self, string, repl, **kwargs):
kw = {'whole_word':True, **kwargs}
return self.add(string, repl, **kw)
def add_pattern(self, ptn, repl, **kwargs):
kw = {'escape':False, **kwargs}
if isinstance(ptn, re.Pattern):
ptn = ptn.pattern
return self.add(ptn, repl, **kw)
def build(self):
if self._master is not None:
return self._master.build()
self._ops = [PtnReps(flags, reps) for flags, reps in self.flagreplacements.items()]
return self
def __call__(self, string):
if self._master is not None:
return self._master(string)
if self._ops is None:
self.build()
for op in self._ops:
string = op(string)
return string
def look_pattern(s, behind, neg):
if behind:
tpt = '(?<!%s)' if neg else '(?<=%s)'
else:
tpt = '(?!%s)' if neg else '(?=%s)'
if isinstance(s, str):
return tpt%s
lgs = group_to_lists(s, len)
if len(lgs)==1:
vs = next(iter(lgs.values()))
return tpt%('|'.join(vs))
gps = [tpt%('|'.join(vs)) for vs in lgs.values()]
return '(?:%s)'%('|'.join(gps))
def group_to_lists(items, fn):
d = defaultdict(list)
for item in items:
d[fn(item)].append(item)
return d
def build_look_pattern(before, target, after, before_neg, after_neg):
bef = look_pattern(before, True, before_neg) if before else ''
aft = look_pattern(after, False, after_neg) if after else ''
return bef + (target or '') + aft
def apply_match_caps(repl):
if isinstance(repl, str):
if all(ch.lower()==ch.upper() for ch in repl):
return repl
return lambda s : copy_case(s, repl)
return lambda s : copy_case(s, repl(s))
def copy_case(src, dst):
if src.islower():
return dst.lower()
if src.istitle():
return dst.title() if ' ' in src else dst.capitalize()
if src.isupper():
return dst.upper()
dst = list(dst)
uc = 0
for i,ch in enumerate(dst):
if i < len(src):
if src[i].isupper():
uc = 1
elif src[i].islower():
uc = -1
if uc==1:
dst[i] = ch.upper()
elif uc==-1:
dst[i] = ch.lower()
return ''.join(dst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment