Skip to content

Instantly share code, notes, and snippets.

@jesseh
Created November 29, 2012 13:39
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save jesseh/4169164 to your computer and use it in GitHub Desktop.
Save jesseh/4169164 to your computer and use it in GitHub Desktop.
Pareto Chart with Matplotlib
import pandas as pd
import matplotlib.pyplot as plt
def combined_label(perc, tot):
"""
Format a label to include by Euros and %.
"""
return "{0:,.0f}k EUR, {1:.0f}%".format(perc * tot / 1000, perc * 100)
def cost_cum(data, focus, subject):
"""
Accumulate the stats.
- data is a DataFrame,
- focus is the colum to group by
- subject is the column to aggregate.
"""
# Setup data frame
parts = data[[focus, 'cost']].groupby(focus).sum().sort(subject, ascending=False)
parts['percent'] = parts['cost'] / parts.cost.sum()
parts['cum_percent'] = parts['percent'].cumsum()
return parts
def cost_pareto(data, focus_name, limit_percent = 0.75):
# Filter and organize the data frame
top_parts = data[data['cum_percent'] < limit_percent]
top_parts.set_index(top_parts['percent'])
# Draw the plots
fig = plt.figure(figsize=(10,7))
fig.subplots_adjust(bottom=0.4, left=0.15)
ax = fig.add_subplot(1,1,1)
top_parts['cum_percent'].plot(ax=ax, color="k", drawstyle="steps-post")
top_parts['percent'].plot(ax=ax, kind="bar", color="k", alpha=0.5)
ax.set_ylim(bottom=0, top=1)
tick_nums = [x/float(100) for x in range(0,101,20)]
ax.set_yticks(tick_nums)
tot_cost = top_parts['cost'].sum()
ax.set_yticklabels([combined_label(x, tot_cost) for x in tick_nums])
ax.set_title("Top %s%% of Cost Split By %s" % (int(limit_percent * 100), focus))
ax.set_xlabel("")
return ax
accumulated = cost_cum(data, 'Part', 'Cost')
chart = cost_pareto(accumulated, 'Part', 0.9)
@tisimst
Copy link

tisimst commented Sep 5, 2013

Good work!

For what it's worth, here's my take at making a production-ready code to do this task. Take a look at https://github.com/tisimst/paretochart for the file and a nice example of its usage and customizability. Cheers!

Abraham (tisimst)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment