This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# RSA helper class for pycrypto | |
# Copyright (c) Dennis Lee | |
# Date 21 Mar 2017 | |
# Description: | |
# Python helper class to perform RSA encryption, decryption, | |
# signing, verifying signatures & keys generation | |
# Dependencies Packages: | |
# pycrypto |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo apt-get update | |
sudo apt-get install build-essential chrpath libssl-dev libxft-dev -y | |
sudo apt-get install libfreetype6 libfreetype6-dev -y | |
sudo apt-get install libfontconfig1 libfontconfig1-dev -y | |
cd ~ | |
export PHANTOM_JS="phantomjs-2.1.1-linux-x86_64" | |
wget https://github.com/Medium/phantomjs/releases/download/v2.1.1/$PHANTOM_JS.tar.bz2 | |
sudo tar xvjf $PHANTOM_JS.tar.bz2 | |
sudo mv $PHANTOM_JS /usr/local/share | |
sudo ln -sf /usr/local/share/$PHANTOM_JS/bin/phantomjs /usr/local/bin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import string | |
import pickle | |
from operator import itemgetter | |
from nltk.corpus import stopwords as sw | |
from nltk.corpus import wordnet as wn | |
from nltk import wordpunct_tokenize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re, string | |
from unidecode import unidecode | |
PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation)) | |
class Fingerprinter(object): | |
''' | |
Python implementation of Google Refine fingerprinting algorithm described here: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import codecs | |
import os | |
import sys | |
import numpy as np | |
from sklearn.feature_extraction.text import TfidfVectorizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var parser = document.createElement('a'); | |
parser.href = "http://example.com:3000/pathname/?search=test#hash"; | |
parser.protocol; // => "http:" | |
parser.hostname; // => "example.com" | |
parser.port; // => "3000" | |
parser.pathname; // => "/pathname/" | |
parser.search; // => "?search=test" | |
parser.hash; // => "#hash" | |
parser.host; // => "example.com:3000" |