Created
March 10, 2015 09:35
-
-
Save wonga00/c71ed72421efd9a3250b to your computer and use it in GitHub Desktop.
Time Series helpers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from datetime import datetime, timedelta | |
from pm_base.util.tz import convert_to_utc, convert_to_localtime | |
from numpy import ones | |
from matplotlib.pyplot import xlim, title | |
def to_ts(values, tz=None): | |
"""convenience function that can take in a list of times | |
or list of (v, t) pairs | |
returns pandas timeseries | |
if the input is just a list of times, the values of the series is just 1 | |
if tz is supplied, timeseries is localized to the tz | |
""" | |
values = list(values) | |
if type(values[0]) == datetime: | |
ts = pd.Series(ones(len(values)), index=values) | |
elif len(values[0]) == 2 and type(values[0][1]) == datetime: | |
ts = pd.Series(*zip(*values)) | |
else: | |
raise ValueError('unexpected input') | |
if tz: | |
return ts.tz_localize('UTC').tz_convert(tz) | |
else: | |
return ts | |
def percent(val): | |
"""generates a function suitable for use in ts.resample(period, how) | |
to find the percentage of occurance within a bucket | |
ex. order_state_ts.resample('1h', how=percent('assigned_to_courier')) | |
""" | |
def f(seq): | |
c = 0 | |
for v in seq: | |
if v == val: | |
c += 1 | |
return c / float(len(seq)) if len(seq) else 0 | |
return f | |
def lag_plot(start, end, tz, lag, tsfun, period='10min', how='count'): | |
""" | |
often we want to do lag comparisons of timeseries, ex compare today's job | |
request count to last week | |
start: utc datetime | |
end: utc datetime | |
tz: timezone string | |
lag: timedelta (2 lags will be generated) | |
ts: a function that will return a time-series from a date range | |
period: resample period | |
how: resample method | |
""" | |
date_range = [ | |
convert_to_utc(start, tz), | |
convert_to_utc(end, tz) | |
] | |
linestyles = ['-', '-', '--'] | |
alphas = [1.0, 0.5, 0.25] | |
for x in xrange(3): | |
ts = tsfun([d - x * lag for d in date_range]) | |
(ts.tz_localize('UTC') | |
.tz_convert(tz) | |
.tshift(freq=x * lag) | |
.resample(period, how=how) | |
.fillna(0) | |
.plot(color='b', alpha=alphas[x], linestyle=linestyles[x])) | |
xlim(date_range) | |
title(tsfun.__name__) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment