Skip to content

Instantly share code, notes, and snippets.

@nicolewhite
Created August 29, 2014 16:42
Show Gist options
  • Save nicolewhite/167828e51d8f2b6fad75 to your computer and use it in GitHub Desktop.
Save nicolewhite/167828e51d8f2b6fad75 to your computer and use it in GitHub Desktop.
Get politicians' tweets and put into csv.
import tweepy
from tweepy import Cursor
import unicodecsv
from unidecode import unidecode
# Authentication and connection to Twitter API.
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
# Usernames whose tweets we want to gather.
users = ["lisamurkowski",
"SenJohnMcCain",
"JeffFlake",
"SenMarkPryor",
"JohnBoozman",
"SenFeinstein",
"SenatorBoxer",
"MarkUdall",
"SenBennetCO",
"ChrisMurphyCT",
"SenBlumenthal",
"SenatorCarper",
"ChrisCoons",
"marcorubio",
"SaxbyChambliss",
"brianschatz",
"maziehirono",
"MikeCrapo",
"SenatorRisch",
"SenatorDurbin",
"SenDonnelly",
"SenDanCoats",
"ChuckGrassley",
"SenatorHarkin",
"SenPatRoberts",
"JerryMoran",
"SenRandPaul",
"SenLandrieu",
"DavidVitter",
"SenatorBarb",
"MarkeyMemo",
"stabenow",
"amyklobuchar",
"SenatorWicker",
"clairecmc",
"RoyBlunt",
"jontester",
"SenatorFischer",
"SenatorReid",
"SenDeanHeller",
"SenatorShaheen",
"kellyayotte",
"CoryBooker",
"SenatorMenendez",
"MartinHeinrich",
"SenatorTomUdall",
"SenSchumer",
"SenGillibrand",
"SenatorBurr",
"SenatorHagan",
"SenatorHeitkamp",
"SenJohnHoeven",
"SenSherrodBrown",
"robportman",
"jiminhofe",
"RonWyden",
"SenToomey",
"SenJackReed",
"SenWhitehouse",
"GrahamBlog",
"SenatorTimScott",
"SenJohnThune",
"SenTedCruz",
"SenMikeLee",
"SenatorLeahy",
"timkaine",
"SenRockefeller",
"Sen_JoeManchin",
"SenRonJohnson",
"SenatorEnzi",
"SenJohnBarrasso"]
with open('tweets.csv', 'wb') as file:
writer = unicodecsv.writer(file, delimiter = ',', quotechar = '"')
# Write header row.
writer.writerow(["politician_name",
"politician_username",
"politician_followers_count",
"politician_listed_count",
"politician_following",
"politician_favorites",
"politician_verified",
"politician_default_profile",
"politician_location",
"politician_time_zone",
"politician_statuses_count",
"politician_description",
"politician_geo_enabled",
"politician_contributors_enabled",
"tweet_year",
"tweet_month",
"tweet_day",
"tweet_hour",
"tweet_text",
"tweet_lat",
"tweet_long",
"tweet_source",
"tweet_in_reply_to_screen_name",
"tweet_direct_reply",
"tweet_retweet_status",
"tweet_retweet_count",
"tweet_favorite_count",
"tweet_hashtags",
"tweet_hashtags_count",
"tweet_urls",
"tweet_urls_count",
"tweet_user_mentions",
"tweet_user_mentions_count",
"tweet_media_type",
"tweet_contributors"])
for user in users:
user_obj = api.get_user(user)
# Gather info specific to the current user.
user_info = [user_obj.name,
user_obj.screen_name,
user_obj.followers_count,
user_obj.listed_count,
user_obj.friends_count,
user_obj.favourites_count,
user_obj.verified,
user_obj.default_profile,
user_obj.location,
user_obj.time_zone,
user_obj.statuses_count,
user_obj.description,
user_obj.geo_enabled,
user_obj.contributors_enabled]
# Get 1000 most recent tweets for the current user.
for tweet in Cursor(api.user_timeline, screen_name = user).items(1000):
# Latitude and longitude stored as array of floats within a dictionary.
lat = tweet.coordinates['coordinates'][1] if tweet.coordinates != None else None
long = tweet.coordinates['coordinates'][0] if tweet.coordinates != None else None
# If tweet is not in reply to a screen name, it is not a direct reply.
direct_reply = True if tweet.in_reply_to_screen_name != "" else False
# Retweets start with "RT ..."
retweet_status = True if tweet.text[0:3] == "RT " else False
# Get info specific to the current tweet of the current user.
tweet_info = [tweet.created_at.year,
tweet.created_at.month,
tweet.created_at.day,
tweet.created_at.hour,
unidecode(tweet.text),
lat,
long,
tweet.source,
tweet.in_reply_to_screen_name,
direct_reply,
retweet_status,
tweet.retweet_count,
tweet.favorite_count]
# Below entities are stored as variable-length dictionaries, if present.
hashtags = []
hashtags_data = tweet.entities.get('hashtags', None)
if(hashtags_data != None):
for i in range(len(hashtags_data)):
hashtags.append(unidecode(hashtags_data[i]['text']))
urls = []
urls_data = tweet.entities.get('urls', None)
if(urls_data != None):
for i in range(len(urls_data)):
urls.append(unidecode(urls_data[i]['url']))
user_mentions = []
user_mentions_data = tweet.entities.get('user_mentions', None)
if(user_mentions_data != None):
for i in range(len(user_mentions_data)):
user_mentions.append(unidecode(user_mentions_data[i]['screen_name']))
media = []
media_data = tweet.entities.get('media', None)
if(media_data != None):
for i in range(len(media_data)):
media.append(unidecode(media_data[i]['type']))
contributors = []
if(tweet.contributors != None):
for contributor in tweet.contributors:
contributors.append(unidecode(contributor['screen_name']))
more_tweet_info = [', '.join(hashtags),
len(hashtags),
', '.join(urls),
len(urls),
', '.join(user_mentions),
len(user_mentions),
', '.join(media),
', '.join(contributors)]
# Write data to CSV.
writer.writerow(user_info + tweet_info + more_tweet_info)
# Show progress.
print("Wrote tweets by %s to CSV." % user)
@andersolarsson
Copy link

Excellent, thanks for this. I was wondering if you knew how to change the length of the "tweet_text" part of the output so as to allow for the entire length of the tweet to be visible in the CSV. Also, do you know a similar method to the one presented in the code here that would allow to capture tweets not based on accounts, but based on search terms such as hashtags?

@DavidZamoraR
Copy link

Excellent contribution. Thank you very much NicoleWhite
How do I get more than 3200 tweets in the same .csv file?

I understand that tweepy has a limit of requirements for every fifteen minutes. However, I'd like to get more tweets in the same file.

If you can't do that, how do I extract the last 3200 tweets and then in another file the remaining ones without repeating?

@Lavarider
Copy link

This is awesome, thank you so much! How would I add a try-except function to catch errors when either a user has been removed or I reached my API limit? I want to run this for hours on end without having to check on it.

@birdcharlie68
Copy link

I have written the code and put it through my mac terminal. A CSV file has been created but only with the headings and no tweets? Could anyone tell me why this is?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment