Skip to content

Instantly share code, notes, and snippets.

@koteitan
Created October 24, 2025 19:17
Show Gist options
  • Select an option

  • Save koteitan/4803e0e493c2f506ebc4f14a1eae1d1e to your computer and use it in GitHub Desktop.

Select an option

Save koteitan/4803e0e493c2f506ebc4f14a1eae1d1e to your computer and use it in GitHub Desktop.
dump text in github for LLM
import os
import sys
def is_binary(file_path):
try:
with open(file_path, 'rb') as f:
chunk = f.read(1024)
if b'\0' in chunk:
return True
text_characters = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)))
nontext = chunk.translate(None, text_characters)
return float(len(nontext)) / len(chunk) > 0.30
except Exception:
return True # 読めないファイルはバイナリ扱い
def export_codebase(root_dir, output_file):
with open(output_file, 'w', encoding='utf-8') as out:
for dirpath, dirnames, filenames in os.walk(root_dir):
# .git を含むディレクトリは探索対象から除外
dirnames[:] = [d for d in dirnames if d != '.git']
for filename in filenames:
file_path = os.path.join(dirpath, filename)
rel_path = os.path.relpath(file_path, root_dir)
if not is_binary(file_path):
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
code = f.read()
out.write(f"\n--- FILE: {rel_path} ---\n")
out.write(code)
out.write("\n")
except Exception as e:
print(f"Error reading {file_path}: {e}")
if __name__ == "__main__":
if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} <source_directory> <output_file>")
sys.exit(1)
source_dir = sys.argv[1]
output_file = sys.argv[2]
export_codebase(source_dir, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment