Skip to content

Instantly share code, notes, and snippets.

@sparfenyuk
Last active January 25, 2025 08:25
Show Gist options
  • Select an option

  • Save sparfenyuk/628390c8fed7be8081d7d445fd51a220 to your computer and use it in GitHub Desktop.

Select an option

Save sparfenyuk/628390c8fed7be8081d7d445fd51a220 to your computer and use it in GitHub Desktop.
Concats all the source files in the given git repository (written by Claude)
#!/usr/bin/env python3
import os
import subprocess
import sys
from datetime import datetime
import fnmatch
from pathlib import Path
import argparse
def get_git_status(repo_path):
"""Get list of committed and staged files from git repository."""
os.chdir(repo_path)
tracked_files = subprocess.check_output(
['git', 'ls-tree', '-r', 'HEAD', '--name-only']
).decode('utf-8').splitlines()
staged_files = subprocess.check_output(
['git', 'diff', '--cached', '--name-only']
).decode('utf-8').splitlines()
return list(set(tracked_files + staged_files))
def get_file_edit_time(file_path):
"""Get the last edit time of a file from git history."""
try:
timestamp = subprocess.check_output(
['git', 'log', '-1', '--format=%at', file_path]
).decode('utf-8').strip()
return int(timestamp) if timestamp else 0
except subprocess.CalledProcessError:
return 0
def should_exclude(file_path, exclude_patterns):
"""Check if file should be excluded based on patterns."""
default_patterns = [
'*.lock*',
'*.pyc',
'__pycache__/*',
'.git/*',
'.gitignore',
'*.dll',
'*.exe',
'*.so',
'*.dylib',
'node_modules/*',
'.env',
'.DS_Store',
'Thumbs.db'
]
all_patterns = default_patterns + exclude_patterns
for pattern in all_patterns:
if fnmatch.fnmatch(file_path, pattern):
return True
return False
def process_repository(repo_path, output_file, dry_run=False, exclude_patterns=None):
"""Process repository files and write them to output file."""
if not os.path.exists(repo_path):
print(f"Error: Repository path '{repo_path}' does not exist.")
sys.exit(1)
if not os.path.exists(os.path.join(repo_path, '.git')):
print(f"Error: '{repo_path}' is not a git repository.")
sys.exit(1)
exclude_patterns = exclude_patterns or []
files = get_git_status(repo_path)
file_times = []
for file_path in files:
if not should_exclude(file_path, exclude_patterns) and os.path.exists(file_path):
edit_time = get_file_edit_time(file_path)
file_times.append((file_path, edit_time))
file_times.sort(key=lambda x: x[1], reverse=True)
if dry_run:
print("Files to be processed:")
for file_path, edit_time in file_times:
dt = datetime.fromtimestamp(edit_time)
print(f"{file_path} (last edited: {dt})")
return
with open(output_file, 'w', encoding='utf-8') as out:
for file_path, _ in file_times:
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
out.write(f">>> {file_path}\n")
out.write(content)
out.write("\n")
except Exception as e:
print(f"Warning: Could not process file '{file_path}': {str(e)}")
def main():
parser = argparse.ArgumentParser(description='Process git repository files')
parser.add_argument('repo_path', help='Path to git repository')
parser.add_argument('output_file', help='Output file path')
parser.add_argument('--dry-run', action='store_true', help='Print files to be processed without creating output')
parser.add_argument('--exclude-patterns', nargs='+', default=[], help='Additional glob patterns to exclude')
args = parser.parse_args()
try:
process_repository(
os.path.abspath(args.repo_path),
args.output_file,
args.dry_run,
args.exclude_patterns
)
if not args.dry_run:
print(f"Successfully processed repository. Output written to '{args.output_file}'")
except Exception as e:
print(f"Error processing repository: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment