Skip to content

Instantly share code, notes, and snippets.

@JohnCoogan
Last active August 29, 2015 14:26
Show Gist options
  • Save JohnCoogan/6c4ff9a04e13bf763990 to your computer and use it in GitHub Desktop.
Save JohnCoogan/6c4ff9a04e13bf763990 to your computer and use it in GitHub Desktop.
Soylent Customer Ranking Script
# The MIT License (MIT)
# Copyright (c) 2015 John Coogan
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import argparse
import csv
import time
from collections import defaultdict
def dollars_to_seconds(dollars):
decayed_days = dollars ** 0.75
seconds_per_day = 24 * 60 * 60
active_seconds = int(seconds_per_day * decayed_days)
return active_seconds
def get_active_users(all_transactions):
trans_by_time = sorted(all_transactions, key=lambda x: x[1])
result_dict = {}
for email, timestamp, amount in trans_by_time:
timestamp = int(timestamp)
amount = float(amount)
current_value = result_dict.get(email)
additional_seconds = dollars_to_seconds(amount)
if current_value and current_value > timestamp:
# user was active at the time of this purchase.
new_value = current_value + additional_seconds
else:
# user is inactive and needs to either activate or reactivate.
new_value = timestamp + additional_seconds
result_dict[email] = new_value
sorted_results = sorted(list(result_dict.items()), key=lambda x: x[1],
reverse=True)
current_timestamp = int(time.time())
active_users = [(email, score) for email, score in sorted_results
if score > current_timestamp]
print("Total users: {}".format(len(sorted_results)))
print("Total actives: {}".format(len(active_users)))
return sorted_results
def get_score(timestamp, amount, gravity=1.8):
timestamp = int(timestamp)
amount = float(amount)
seconds_ago = time.time() - timestamp
item_hour_age = seconds_ago / 3600
score = (amount - 1) / ((item_hour_age + 2) ** gravity)
return score
def get_agg_score(trans):
return sum(get_score(timestamp, amount)
for email, timestamp, amount in trans)
def get_user_scores(all_transactions):
user_transctions = defaultdict(list)
for trans in all_transactions:
user_transctions[trans[0]].append(trans)
user_scores = [(e, get_agg_score(t)) for e, t in user_transctions.items()]
sorted_results = sorted(user_scores, key=lambda x: x[1], reverse=True)
return sorted_results
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Calculate active users.')
parser.add_argument('--infile', type=argparse.FileType('r'),
help='filename of csv with user data', required=True)
parser.add_argument('--outfile', type=argparse.FileType('w'),
help='filename of output file', required=True)
parser.add_argument('--method', type=str, help='pick either hn or soylent',
default='soylent')
args = parser.parse_args()
in_csv = csv.reader(args.infile)
all_transactions = [x for x in in_csv]
if args.method == 'hn':
sorted_results = get_user_scores(all_transactions)
else:
sorted_results = get_active_users(all_transactions)
out_csv = csv.writer(args.outfile)
out_csv.writerows(sorted_results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment