One Paragraph of project description goes here
These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
date | value | |
---|---|---|
2013-01 | 53 | |
2013-02 | 165 | |
2013-03 | 269 | |
2013-04 | 344 | |
2013-05 | 376 | |
2013-06 | 410 | |
2013-07 | 421 | |
2013-08 | 405 | |
2013-09 | 376 |
import glob | |
import math | |
line='' | |
s=set() | |
flist=glob.glob(r'E:\PROGRAMMING\PYTHON\programs\corpus2\*.txt') #get all the files from the d`#open each file >> tokenize the content >> and store it in a set | |
for fname in flist: | |
tfile=open(fname,"r") | |
line=tfile.read() # read the content of file and store in "line" | |
tfile.close() # close the file | |
s=s.union(set(line.split(' '))) # union of common words |
# Given a list of words, remove any that are | |
# in a list of stop words. | |
def removeStopwords(wordlist, stopwords): | |
return [w for w in wordlist if w not in stopwords] |
import math | |
from text.blob import TextBlob as tb | |
def tf(word, blob): | |
return blob.words.count(word) / len(blob.words) | |
def n_containing(word, bloblist): | |
return sum(1 for blob in bloblist if word in blob) | |
def idf(word, bloblist): |
import glob | |
import math | |
line='' | |
s=set() | |
flist=glob.glob(r'E:\PROGRAMMING\PYTHON\programs\corpus2\*.txt') #get all the files from the d`#open each file >> tokenize the content >> and store it in a set | |
for fname in flist: | |
tfile=open(fname,"r") | |
line=tfile.read() # read the content of file and store in "line" | |
tfile.close() # close the file | |
s=s.union(set(line.split(' '))) # union of common words |