# given excel file removes strikeout # Emanuele Ruffaldi MMI 2025 # requires openpyxl # Tested with 3.10.12 and openpyxl 3.1.4 # import sys from openpyxl import load_workbook from openpyxl.styles import Font from openpyxl.cell.rich_text import Text, CellRichText import shutil def remove_strikeout_text(wb): """Removes strikeout from all sheets. Return true if any found""" changed = False for sheet in wb.worksheets: for row in sheet.iter_rows(): for cell in row: cellstrike = cell.font and cell.font.strike if isinstance(cell.value, CellRichText): l = list(cell.value) # .as_list() will return str if cellstrike: # remove all the str and preserve the correct TextBlocks nl = [t for t in l if not isinstance(t,str) and (not t.font or not t.font.strike)] else: nl = [t for t in l if isinstance(t,str) or not t.font or not t.font.strike] if len(nl) != len(l): # Changed to: empty, simple string, CellRichText if len(nl) == 0: cell.value = "" elif all((isinstance(t,str) for t in nl)): cell.value = "".join(nl) else: cell.value = CellRichText(nl) changed = True elif cell.value and cellstrike: # Make it empty string for string, None for other types if isinstance(cell.value,str): cell.value = "" else: cell.value = None changed = True # remove strike from font anyway if cellstrike: cell.font = Font(**{**cell.font.__dict__, "strike": False}) changed = True return changed def main(): if len(sys.argv) == 1: print("Expected: inputfile [outputfile]",file=sys.stderr) else: infile = sys.argv[1] outfile = sys.argv[1]+".cleaned.xlsx" if len(sys.argv) < 3 else sys.argv[2] wb = load_workbook(infile,rich_text=True) if remove_strikeout_text(wb): wb.save(outfile) else: #copy2 does full metadata print("Full copy no strike",file=sys.stderr) shutil.copy2(infile,outfile) if __name__ == "__main__": main()