Skip to content

Instantly share code, notes, and snippets.

@wonga00
Created March 10, 2015 09:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wonga00/c71ed72421efd9a3250b to your computer and use it in GitHub Desktop.
Save wonga00/c71ed72421efd9a3250b to your computer and use it in GitHub Desktop.
Time Series helpers
import pandas as pd
from datetime import datetime, timedelta
from pm_base.util.tz import convert_to_utc, convert_to_localtime
from numpy import ones
from matplotlib.pyplot import xlim, title
def to_ts(values, tz=None):
"""convenience function that can take in a list of times
or list of (v, t) pairs
returns pandas timeseries
if the input is just a list of times, the values of the series is just 1
if tz is supplied, timeseries is localized to the tz
"""
values = list(values)
if type(values[0]) == datetime:
ts = pd.Series(ones(len(values)), index=values)
elif len(values[0]) == 2 and type(values[0][1]) == datetime:
ts = pd.Series(*zip(*values))
else:
raise ValueError('unexpected input')
if tz:
return ts.tz_localize('UTC').tz_convert(tz)
else:
return ts
def percent(val):
"""generates a function suitable for use in ts.resample(period, how)
to find the percentage of occurance within a bucket
ex. order_state_ts.resample('1h', how=percent('assigned_to_courier'))
"""
def f(seq):
c = 0
for v in seq:
if v == val:
c += 1
return c / float(len(seq)) if len(seq) else 0
return f
def lag_plot(start, end, tz, lag, tsfun, period='10min', how='count'):
"""
often we want to do lag comparisons of timeseries, ex compare today's job
request count to last week
start: utc datetime
end: utc datetime
tz: timezone string
lag: timedelta (2 lags will be generated)
ts: a function that will return a time-series from a date range
period: resample period
how: resample method
"""
date_range = [
convert_to_utc(start, tz),
convert_to_utc(end, tz)
]
linestyles = ['-', '-', '--']
alphas = [1.0, 0.5, 0.25]
for x in xrange(3):
ts = tsfun([d - x * lag for d in date_range])
(ts.tz_localize('UTC')
.tz_convert(tz)
.tshift(freq=x * lag)
.resample(period, how=how)
.fillna(0)
.plot(color='b', alpha=alphas[x], linestyle=linestyles[x]))
xlim(date_range)
title(tsfun.__name__)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment