Skip to content

Instantly share code, notes, and snippets.

@nsaphra
Created February 17, 2015 17:29
Show Gist options
  • Select an option

  • Save nsaphra/aefd783da23a582b3c6a to your computer and use it in GitHub Desktop.

Select an option

Save nsaphra/aefd783da23a582b3c6a to your computer and use it in GitHub Desktop.
Concatenate all the files in a directory, recursively, and print their contents.
#!/usr/bin/python
from collections import defaultdict
import json
import os
import argparse
import gzip
import sys
import codecs
from time import asctime
verbose = True
def log(s):
print >> sys.stderr, s
def verblog(s):
if verbose:
log(s)
def processfile(path, fname):
fh = None
destname = ''
fullpath = os.path.join(path, fname)
if not fname.startswith('.') and os.path.isfile(fullpath):
f = open(fullpath, 'r')
print f.read().lower() # Note you can incorporate lower() into preproc instead
f.close()
else:
return
def traversedocs(rootdir):
for path, subdirs, files in os.walk(rootdir):
verblog("%s - traversing %s" % (asctime(), path))
for fname in files:
processfile(path, fname)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='munge twitter')
parser.add_argument('--rootdir')
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
verbose = args.verbose
args.rootdir.rstrip('/')
traversedocs(args.rootdir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment