Last active
September 5, 2023 19:34
-
-
Save juanbretti/9dcc81b55323d59c8d36938e111c2e75 to your computer and use it in GitHub Desktop.
Reformat .md to better suit Obsidian formatting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # %% | |
| import os | |
| from datetime import datetime | |
| import csv | |
| import pandas as pd | |
| from pathlib import Path | |
| # Inputs | |
| directory = 'C:\OneNote' | |
| name_length = 15 | |
| def md_formatting(path): | |
| with open(path, 'r', encoding='UTF-8') as file: | |
| content = file.read() | |
| double_newline_count = content.count('\n\n') | |
| single_newline_count = content.count('\n') - double_newline_count | |
| content = content.replace('](media/', '](Attachments/') | |
| content = content.replace('src="media/', 'src="Attachments/') | |
| if double_newline_count / single_newline_count > 0.3: | |
| content = content.replace('\n\n', '\n') | |
| else: | |
| lines = content.splitlines() | |
| del lines[1] | |
| del lines[2] | |
| del lines[3] | |
| content = '\n'.join(lines) | |
| with open(path, 'w', encoding='UTF-8') as file: | |
| file.write(content) | |
| with open(path, 'r', encoding='UTF-8') as file: | |
| lines = file.readlines() | |
| line_header = lines.pop(0) | |
| lines.insert(0, '---\n') | |
| line_ = lines[1].replace('Created: ', '').strip() | |
| line_ = datetime.strptime(line_, '%Y-%m-%d %H:%M:%S %z').isoformat() | |
| lines[1] = f'created: {line_}\n' | |
| line_ = lines[2].replace('Modified: ', '').strip() | |
| line_ = datetime.strptime(line_, '%Y-%m-%d %H:%M:%S %z').isoformat() | |
| lines[2] = f'modified: {line_}\n' | |
| lines.insert(3, f'migration: {datetime.now().isoformat()}\n') | |
| lines.insert(4, 'tags: onenote, migration\n') | |
| lines.insert(5, 'project: OneNote to Obsidian\n') | |
| lines.insert(6, f'alias: {line_header[2:]}') | |
| path_splits = path.split('\\') | |
| source = '' | |
| for path_split in path_splits[1:-1]: | |
| path_split_ = path_split.replace('-', ' ') | |
| source = source + f' > [[{path_split_}]]' | |
| source = source + f' > [[{line_header[2:-1]}]]' | |
| lines.insert(8, '\n') | |
| lines.insert(9, f'Source:: {source[3:]}') | |
| lines.insert(10, '\n') | |
| lines.insert(11, f'Parent:: [[{path_splits[-2]}]]') | |
| lines.insert(12, '\n') | |
| lines.insert(13, f'Current:: [[{line_header[2:-1]}]]') | |
| lines.insert(14, '\n'*2) | |
| lines.insert(15, line_header) | |
| lines.insert(16, '\n'*2) | |
| with open(path, 'w', encoding='UTF-8') as file: | |
| file.writelines(lines) | |
| def output_csv_md(header, output_filename, data): | |
| with open(os.path.join(directory, output_filename + '.csv'), mode='w', newline='', encoding='UTF-8') as file: | |
| writer = csv.writer(file, delimiter=',') | |
| writer.writerow(header) | |
| writer.writerows(data) | |
| df = pd.read_csv(os.path.join(directory, output_filename + '.csv'), encoding='UTF-8') | |
| with open(os.path.join(directory, output_filename + '.md'), 'w', encoding='UTF-8') as md: | |
| df.to_markdown(buf=md) | |
| counter = 0 | |
| data = [] | |
| for root, dirs, files in os.walk(directory): | |
| # Formatear `.md` | |
| for file in files: | |
| if file.endswith('.md'): | |
| file_path = os.path.join(root, file) | |
| print(file_path) | |
| md_formatting(file_path) | |
| # Renombrar archivo | |
| name, extension = os.path.splitext(file) | |
| if len(name) > name_length: | |
| new_filename = name[:name_length] + "-" + str(counter) + extension | |
| new_filename = os.path.join(root, new_filename) | |
| os.rename(file_path, new_filename) | |
| data.append([file_path, new_filename, counter]) | |
| counter += 1 | |
| output_csv_md(['file_path', 'new_filename', 'counter'], 'md_formatting', data) | |
| # Rename folder `media` to `Attachments` | |
| data = [] | |
| for root, dirs, files in os.walk(directory): | |
| for dir in dirs: | |
| if 'media' == dir: | |
| old_path = os.path.join(root, dir) | |
| new_path = os.path.join(root, 'Attachments') | |
| os.rename(old_path, new_path) | |
| data.append([old_path, new_path]) | |
| output_csv_md(['old_path', 'new_path'], 'rename_media', data) | |
| # Trim folder names | |
| data = [] | |
| counter = 0 | |
| def rename_folders(path): | |
| global data | |
| global counter | |
| for item in path.iterdir(): | |
| if item.is_dir(): | |
| if len(item.name) > name_length and item.name not in ['media', 'Attachments']: | |
| new_name = item.name[:name_length] + "-" + str(counter) | |
| new_path = item.parent / new_name | |
| item.rename(new_path) | |
| data.append([str(item), str(new_path), counter]) | |
| counter += 1 | |
| else: | |
| new_path = item | |
| rename_folders(new_path) | |
| rename_folders(Path(directory)) | |
| output_csv_md(['item', 'new_path', 'counter'], f'rename_trim_{str(name_length)}', data) | |
| # Delete empty folders | |
| data = [] | |
| for root, dirs, files in os.walk(directory, topdown=False): | |
| for dir in dirs[::-1]: | |
| full_path = os.path.join(root, dir) | |
| if not os.listdir(full_path): | |
| print('>> DELETE:', full_path) | |
| os.rmdir(full_path) | |
| data.append([full_path]) | |
| output_csv_md(['full_path'], 'delete', data) | |
| # %% |
Author
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Be careful, this version deletes empty folders.
I do recommend before running, do a backup of your vault.
Also, this version create a CSV file with the log of all the changes.