from xlwings import Workbook, Range import pandas as pd import os import re # Script to merge a folder containing Excel workbooks into a single workbook. # The folder should only contain Excel workbooks and must all either be in csv, xls or xlsx format # To run, open the command prompt and enter the command python Merge_Excel_Workbooks.py """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" function to load data into Excel without overloading memory """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" def chunk_df(df, sheet, startcell, chunk_size=1000): if len(df) <= (chunk_size + 1): Range(sheet, startcell, index=False, header=True).value = df else: Range(sheet, startcell, index=False).value = list(df.columns) c = re.match(r"([a-z]+)([0-9]+)", startcell[0] + str(int(startcell[1]) + 1), re.I) row = c.group(1) col = int(c.group(2)) for chunk in (df[rw:rw + chunk_size] for rw in range(0, len(df), chunk_size)): Range(sheet, row + str(col), index=False, header=False).value = chunk col += chunk_size """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" Script """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" # Prompt to enter the folder path. # Only Excel Workbooks should be included in the folder path and all should either be in csv, xls or xlsx format workbook_folder = raw_input('Enter folder path of workbooks to merge: ') workbook_folder = os.path.normpath(workbook_folder) passback_templates = os.listdir(workbook_folder) temp_to_merge = [] for i in passback_templates: temp_to_merge.append(workbook_folder + '/' + str(i)) merged_workbooks = pd.DataFrame() # 0 in the read_excel method refers to the index of the sheet. The first # sheet in the workbook has a 0 index. for sheet in temp_to_merge: tab = raw_input('Enter worksheet name to merge: ') if sheet[:-4] == '.csv': workbook = pd.read_csv(sheet, index_col=None, na_values=[0]) merged_workbooks = merged_workbooks.append(workbook) else: workbook = pd.read_excel(sheet, tab, index_col=None, na_values=[0]) merged_workbooks = merged_workbooks.append(workbook) merged_workbooks.fillna(0, inplace = True) # A new workbook will open and copy the merged data. wb = Workbook() chunk_df(merged_workbooks, 'Sheet1', 'A1')