Skip to content

Instantly share code, notes, and snippets.

@jschaub30
Last active October 22, 2015 18:40
Show Gist options
  • Save jschaub30/be53fa1dc30642186021 to your computer and use it in GitHub Desktop.
Save jschaub30/be53fa1dc30642186021 to your computer and use it in GitHub Desktop.
Python script to email users tagged in blogs/comments of a Wordpress blog
#!/usr/bin/python
import sys
import feedparser
import smtplib
import datetime
import time
import requests
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
debug_flag = False
def main(blog_url, username, password):
blog_url = blog_url.rstrip('/')
oldTimeStamp = read_old_timestamp(blog_url)
subscribers = get_subscribers(blog_url, username, password)
sys.stdout.write("\n============================\nScraping %s\n" % blog_url)
sys.stdout.write("\nAnalyzing posts and comments since %s\n" % str(oldTimeStamp))
posts = feedparser.parse(blog_url + '/?feed=rss2')
comments = feedparser.parse(blog_url + '/?feed=comments-rss2')
all_entries = posts['entries'] + comments['entries']
for post in all_entries:
sys.stdout.write('Analyzing "%s": %s\n' % (post['title'].encode('utf-8'),
post['published'].encode('utf-8')))
pdate = ' '.join(post['published'].encode('ascii').split()[0:-1])
pdate = datetime.datetime.strptime(pdate, '%a, %d %b %Y %H:%M:%S')
for handle in subscribers.keys():
email = subscribers[handle]
content = post['content'][0]['value'] if 'content' in post.keys() else post.summary
isTagged = '@' + handle.replace(' ', '-').lower() in content.lower()
if isTagged and (oldTimeStamp < pdate):
notify_user(blog_url, handle, email, post)
write_new_timestamp(blog_url)
def notify_user(blog_url, handle, email, post):
sys.stdout.write("!! Found user %s tagged in %s\n" % (handle, post['link']))
sender = 'noreply@arlab093.austin.ibm.com'
msg = 'Greetings %s,\n\nOn %s, you were tagged in ' % (handle, post['published'])
msg += 'this post on the spark blog:\n\ntitle:\t"%s"\nurl:\t%s' % (
post['title'],post['link'])
msg += '\n\nTo disable these notifications, please email \n'
msg += 'Jeremy (schaubj@us.ibm.com).\n\n'
msg = MIMEText(msg,'plain','utf-8')
msg['From'] = sender
msg['To'] = email
msg['Subject'] = blog_url.split('/')[-1] + " blog: you have been tagged"
if not debug_flag:
try:
smtpObj = smtplib.SMTP('localhost')
smtpObj.sendmail(sender, [email], msg.as_string())
sys.stdout.write("Successfully sent email\n")
except Exception as err:
sys.stdout.write(err)
sys.stdout.write("Error: unable to send email\n")
sys.stdout.write('Run "sudo postfix start"\n')
def write_new_timestamp(blog_url):
fn = blog_url.replace('http://', '').replace('/', '.').lower() + '.timestamp'
f = open(fn, 'w')
f.write(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()))
f.close()
def read_old_timestamp(blog_url):
try:
fn = blog_url.replace('http://', '').replace('/', '.').lower() + '.timestamp'
f = open(fn, 'r')
oldTimeStamp = datetime.datetime.strptime(
f.readline().strip(), '%Y-%m-%d %H:%M:%S')
f.close()
except IOError:
write_new_timestamp(blog_url)
sys.exit()
return oldTimeStamp
def get_subscribers(blog_url, username, password):
subscribers = dict()
payload = {
'action': 'login',
'log': username,
'pwd': password
}
c = requests.session()
c.post('%s/wp-login.php' % blog_url, data=payload)
users_url = '%s/wp-admin/users.php' % blog_url
response = c.get(users_url)
soup = BeautifulSoup(response.text, 'html.parser')
rows = soup.find_all('tr')[1:] # first row is header
try:
num_subs = int(soup.find_all('li', class_="all")[0].span.text.split('(')[1].split(')')[0])
except IndexError:
sys.stderr.write("Authentication error\n")
sys.exit()
page = 0
while len(subscribers) < num_subs:
for row in rows:
cells = row.find_all('td')
try:
username = cells[0].find_all('a')[0].text
email = cells[2].text
subscribers[username] = email
except IndexError:
pass
if len(subscribers) < num_subs:
print "Found %d out of %d subscribers" % (len(subscribers), num_subs)
page += 1
url = '%s?paged=%d' % (users_url, page)
print "Get(%s)" % url
response = c.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
rows = soup.find_all('tr')[1:] # first row is header
if debug_flag:
for handle in subscribers.keys():
print "Found user %s (%s)" % (handle, subscribers[handle])
return subscribers
def usage():
sys.stdout.write('blog_notifier.py <blog_url> <username> <password>')
if __name__ == '__main__':
if len(sys.argv) < 3:
usage()
sys.exit()
else:
main(sys.argv[1], sys.argv[2], sys.argv[3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment