Skip to content

Instantly share code, notes, and snippets.

@richiverse
Last active August 29, 2015 14:09
Show Gist options
  • Save richiverse/0901c664843c14299510 to your computer and use it in GitHub Desktop.
Save richiverse/0901c664843c14299510 to your computer and use it in GitHub Desktop.
Takes a path of xls(x) files and convert them to 1 pandas dataframe.
#!/usr/bin/env python
from __future__ import print_function
from glob import glob
import pandas as pd
# path to your xls(x) files
xlsx_path = "<your/path>/*.xls*"
# set to False in production
DEBUG = True
# ipython magic - uncomment in ipython or ipython notebook instead of glob()
# xlsx = !ls $xlsx_path
# glob works outside of ipython
xlsx = glob(xlsx_path)
def xlspath_todf(xlspath, verbose=DEBUG):
"""Takes a path to xls(x) files and converts them to 1 dataframe."""
dfs = []
if verbose:
print(len(xlsx),'file(s)')
for fil in xlsx:
xl = pd.ExcelFile(fil)
if verbose:
print()
print(' ' * 4,fil)
print(' ' * 4, len(xl.sheet_names), 'sheet(s)')
for sheet in xl.sheet_names:
if verbose:
print(' ' * 8, sheet)
df = pd.read_excel(fil, sheet)
# add excel file name and sheet for easy grouping
df['xl_name'] = fil
df['xl_sheet'] = sheet
dfs.append(df)
return pd.concat(dfs)
df = xlspath_todf(xlsx)
# show first 5 records
df.head()
# show last 5 records
df.tail()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment