Created
October 24, 2025 19:17
-
-
Save koteitan/4803e0e493c2f506ebc4f14a1eae1d1e to your computer and use it in GitHub Desktop.
dump text in github for LLM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import sys | |
| def is_binary(file_path): | |
| try: | |
| with open(file_path, 'rb') as f: | |
| chunk = f.read(1024) | |
| if b'\0' in chunk: | |
| return True | |
| text_characters = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100))) | |
| nontext = chunk.translate(None, text_characters) | |
| return float(len(nontext)) / len(chunk) > 0.30 | |
| except Exception: | |
| return True # 読めないファイルはバイナリ扱い | |
| def export_codebase(root_dir, output_file): | |
| with open(output_file, 'w', encoding='utf-8') as out: | |
| for dirpath, dirnames, filenames in os.walk(root_dir): | |
| # .git を含むディレクトリは探索対象から除外 | |
| dirnames[:] = [d for d in dirnames if d != '.git'] | |
| for filename in filenames: | |
| file_path = os.path.join(dirpath, filename) | |
| rel_path = os.path.relpath(file_path, root_dir) | |
| if not is_binary(file_path): | |
| try: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| code = f.read() | |
| out.write(f"\n--- FILE: {rel_path} ---\n") | |
| out.write(code) | |
| out.write("\n") | |
| except Exception as e: | |
| print(f"Error reading {file_path}: {e}") | |
| if __name__ == "__main__": | |
| if len(sys.argv) != 3: | |
| print(f"Usage: {sys.argv[0]} <source_directory> <output_file>") | |
| sys.exit(1) | |
| source_dir = sys.argv[1] | |
| output_file = sys.argv[2] | |
| export_codebase(source_dir, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment