danemacaulay’s gists

danemacaulay / timer.py

Last active April 16, 2019 20:49

	from functools import wraps
	import time
	import inspect

	def timing(f):
	@wraps(f)
	def wrapper(args, *kwargs):
	module = inspect.getmodule(f).__name__
	start = time.time()
	result = f(args, *kwargs)

danemacaulay / proxy.js

Created December 17, 2018 03:48

node proxy

	const port = process.argv[2] \|\| 7979
	const targetPort = process.argv[3] \|\| 5000
	const path = process.argv[4] \|\| 'client'
	const express = require('express')
	const proxy = require('http-proxy-middleware')
	const app = express()
	app.use(express.static(path))
	app.use('/services', proxy({target: `http://0.0.0.0:${targetPort}`, changeOrigin: true}))
	app.listen(port, () => console.log(`Listening at http://localhost:${port}`))

danemacaulay / day1-challenge-2.js

Created December 2, 2018 07:01

	function getAccumulatedList(list) {
	let accumulatedList = []
	list.reduce((accumulator, item) => {
	accumulatedList.push(accumulator)
	return item + accumulator
	}, 0)
	return list.reduce((accumulator, item) => {
	accumulator.count = item + accumulator.count
	accumulator.acclist.push(accumulator.count)
	return accumulator

danemacaulay / scheduler.py

Last active November 30, 2018 22:03

python background scheduler

	import time
	import threading
	import datetime

	class Scheduler(object):
	def __init__(self, hours, job):
	self.interval = datetime.timedelta(hours=hours).seconds
	self.job = job
	thread = threading.Thread(target=self.run, args=())
	thread.daemon = True

danemacaulay / crypt.py

Created June 18, 2018 16:37

	import ast
	from Crypto.Cipher import PKCS1_OAEP
	from Crypto.PublicKey import RSA
	with open('cert/referral', 'rb') as f: key_text = f.read()
	privkey = RSA.importKey(key_text)
	publickey = privkey.publickey()
	encryptor = PKCS1_OAEP.new(publickey)
	decryptor = PKCS1_OAEP.new(privkey)

	def encrypt(msg):

danemacaulay / indexer.py

Last active January 4, 2018 19:11

Stream through remote common crawl index file to search for WARC entries by URL

	import sys
	import requests
	import zlib
	import json
	from urllib.parse import urlparse
	from collections import Counter
	path = sys.argv[1]
	url = 'https://commoncrawl.s3.amazonaws.com/{}'.format(path)
	google_netloc = 'www.google.com'
	google_path = '/maps/place'

danemacaulay / warc_fetcher.py

Created January 4, 2018 15:19

Direct to STDOUT all warc data on a particular domain using index.commoncrawl.org

	import gzip
	import json
	import requests
	from StringIO import StringIO

	def get_page_count(searchString):
	url = 'http://index.commoncrawl.org/CC-MAIN-2017-51-index?url={}&output=json&showNumPages=true'.format(searchString)
	resp = requests.get(url)
	return json.loads(resp.content)['pages']

danemacaulay / libphonenumber find numbers.java

Created December 14, 2017 22:02

	Set<String> phones = new HashSet<>();
	PhoneNumberUtil util = PhoneNumberUtil.getInstance();

	Iterator<PhoneNumberMatch> iterator = util.findNumbers(source, null).iterator();

	while (iterator.hasNext()) {
	phones.add(iterator.next().rawString());
	}

danemacaulay / show_informative_text_features.py

Created November 7, 2017 15:45

show important features of a text classifier pipeline

	from operator import itemgetter


	def show_most_informative_features(model, text=None, n=50):
	"""
	Accepts a Pipeline with a classifer and a TfidfVectorizer and computes
	the n most informative features of the model. If text is given, then will
	compute the most informative features for classifying that text.

	Note that this function will only work on linear models with coefs_

danemacaulay / build_model.py

Created November 7, 2017 15:38

build, evaluate, and save scikitlearn pipeline

	import os
	import time
	import string
	import pickle
	import pandas as pd

	from operator import itemgetter

	from nltk.corpus import stopwords as sw
	from nltk.corpus import wordnet as wn

Dane Macaulay danemacaulay