Skip to content

Instantly share code, notes, and snippets.

View danemacaulay's full-sized avatar

Dane Macaulay danemacaulay

View GitHub Profile
from functools import wraps
import time
import inspect
def timing(f):
@wraps(f)
def wrapper(*args, **kwargs):
module = inspect.getmodule(f).__name__
start = time.time()
result = f(*args, **kwargs)
@danemacaulay
danemacaulay / proxy.js
Created December 17, 2018 03:48
node proxy
const port = process.argv[2] || 7979
const targetPort = process.argv[3] || 5000
const path = process.argv[4] || 'client'
const express = require('express')
const proxy = require('http-proxy-middleware')
const app = express()
app.use(express.static(path))
app.use('/services', proxy({target: `http://0.0.0.0:${targetPort}`, changeOrigin: true}))
app.listen(port, () => console.log(`Listening at http://localhost:${port}`))
function getAccumulatedList(list) {
let accumulatedList = []
list.reduce((accumulator, item) => {
accumulatedList.push(accumulator)
return item + accumulator
}, 0)
return list.reduce((accumulator, item) => {
accumulator.count = item + accumulator.count
accumulator.acclist.push(accumulator.count)
return accumulator
@danemacaulay
danemacaulay / scheduler.py
Last active November 30, 2018 22:03
python background scheduler
import time
import threading
import datetime
class Scheduler(object):
def __init__(self, hours, job):
self.interval = datetime.timedelta(hours=hours).seconds
self.job = job
thread = threading.Thread(target=self.run, args=())
thread.daemon = True
import ast
from Crypto.Cipher import PKCS1_OAEP
from Crypto.PublicKey import RSA
with open('cert/referral', 'rb') as f: key_text = f.read()
privkey = RSA.importKey(key_text)
publickey = privkey.publickey()
encryptor = PKCS1_OAEP.new(publickey)
decryptor = PKCS1_OAEP.new(privkey)
def encrypt(msg):
@danemacaulay
danemacaulay / indexer.py
Last active January 4, 2018 19:11
Stream through remote common crawl index file to search for WARC entries by URL
import sys
import requests
import zlib
import json
from urllib.parse import urlparse
from collections import Counter
path = sys.argv[1]
url = 'https://commoncrawl.s3.amazonaws.com/{}'.format(path)
google_netloc = 'www.google.com'
google_path = '/maps/place'
@danemacaulay
danemacaulay / warc_fetcher.py
Created January 4, 2018 15:19
Direct to STDOUT all warc data on a particular domain using index.commoncrawl.org
import gzip
import json
import requests
from StringIO import StringIO
def get_page_count(searchString):
url = 'http://index.commoncrawl.org/CC-MAIN-2017-51-index?url={}&output=json&showNumPages=true'.format(searchString)
resp = requests.get(url)
return json.loads(resp.content)['pages']
Set<String> phones = new HashSet<>();
PhoneNumberUtil util = PhoneNumberUtil.getInstance();
Iterator<PhoneNumberMatch> iterator = util.findNumbers(source, null).iterator();
while (iterator.hasNext()) {
phones.add(iterator.next().rawString());
}
@danemacaulay
danemacaulay / show_informative_text_features.py
Created November 7, 2017 15:45
show important features of a text classifier pipeline
from operator import itemgetter
def show_most_informative_features(model, text=None, n=50):
"""
Accepts a Pipeline with a classifer and a TfidfVectorizer and computes
the n most informative features of the model. If text is given, then will
compute the most informative features for classifying that text.
Note that this function will only work on linear models with coefs_
@danemacaulay
danemacaulay / build_model.py
Created November 7, 2017 15:38
build, evaluate, and save scikitlearn pipeline
import os
import time
import string
import pickle
import pandas as pd
from operator import itemgetter
from nltk.corpus import stopwords as sw
from nltk.corpus import wordnet as wn