Instantly share code, notes, and snippets.
Created
February 13, 2021 16:06
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save omendezmorales/1e6e0a2f3ae646a58492c0f541c3ed80 to your computer and use it in GitHub Desktop.
Project 2 Udacity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import numpy as np | |
import pandas as pd | |
CITY_DATA = {'chicago': 'chicago.csv', | |
'new york city': 'new_york_city.csv', | |
'washington': 'washington.csv'} | |
months = ['january', 'february', 'march', 'april', 'may', 'june'] | |
def get_filters(): | |
""" | |
Asks user to specify a city, month, and day to analyze. | |
Returns: | |
(str) city - name of the city to analyze | |
(str) month - name of the month to filter by, or "all" to apply no month filter | |
(str) day - name of the day of week to filter by, or "all" to apply no day filter | |
""" | |
print('Hello! Let\'s explore some US bikeshare data!') | |
mo_msg = 'enter name of the month to filter by (january, february, ... , june), or "all" to apply no month filter: ' | |
da_msg = 'enter name of the day of week to filter by( monday, tuesday, ... sunday), or "all" to apply no day filter: ' | |
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs | |
city = input('Please enter the city you want to explore data from: ') | |
print(city) | |
while city.lower() != 'chicago' \ | |
and city.lower() != 'new york city'\ | |
and city.lower() != 'washington': | |
print(city) | |
city = input('please enter the city you want to explore data from: ') | |
# get user input for month (all, january, february, ... , june) | |
month = input(mo_msg) | |
print(month) | |
while month.lower() != 'january' and month.lower() != 'february' and month.lower() != 'march'\ | |
and month.lower() != 'april' and month.lower() != 'may' and month.lower() != 'june'\ | |
and month.lower() != 'all': | |
month = input(mo_msg) | |
print(month) | |
# get user input for day of week (all, monday, tuesday, ... sunday) | |
day = input(da_msg) | |
print(day) | |
while day.lower() != 'monday' and day.lower() != 'tuesday' and day.lower() != 'wednesday'\ | |
and day.lower() != 'thursday' and day.lower() != 'friday' and day.lower() != 'saturday'\ | |
and day.lower() != 'sunday' and day.lower() != 'all': | |
day = input(da_msg) | |
print(day) | |
print('using city: {}, month: {}, and day: {}\n'.format(city, month, day)) | |
print('-'*40) | |
return city, month, day | |
def load_data(city, month, day): | |
""" | |
Loads data for the specified city and filters by month and day if applicable. | |
Args: | |
(str) city - name of the city to analyze | |
(str) month - name of the month to filter by, or "all" to apply no month filter | |
(str) day - name of the day of week to filter by, or "all" to apply no day filter | |
Returns: | |
df - Pandas DataFrame containing city data filtered by month and day | |
""" | |
# load data file into a dataframe | |
try: | |
print('\nLoading data...\n') | |
start_time = time.time() | |
file_path= os.path.dirname(__file__)+ '/'+ CITY_DATA[city.lower()] | |
df = pd.read_csv(file_path ) | |
# convert the Start Time column to datetime | |
df['Start_Time'] = pd.to_datetime(df['Start Time']) | |
# extract month and day of week from Start Time to create new columns | |
df['month'] = [d.month for d in df['Start_Time']] | |
df['day_of_week'] = [d.day_name() for d in df['Start_Time']] | |
df['start_hr']= [d.hour for d in df['Start_Time']] | |
# filter by month if applicable | |
if month.lower() != 'all': | |
# use the index of the months list to get the corresponding int | |
mo = months.index(month) + 1 | |
# filter by month to create the new dataframe | |
df = df[df["month"] == mo] | |
# filter by day of week if applicable | |
if day.lower() != 'all': | |
# filter by day of week to create the new dataframe | |
df = df[df["day_of_week"] == day.title()] | |
print("\nLoad took %s seconds." % (round(time.time() - start_time,3))) | |
print('-'*40) | |
return df | |
except Exception as e: | |
print("Error occurred: {}".format(e)) | |
def time_stats(df): | |
"""Displays statistics on the most frequent times of travel.""" | |
print('\nCalculating The Most Frequent Times of Travel...\n') | |
start_time = time.time() | |
# display the most common month | |
most_common_month = df['month'].mode()[0] | |
print('Most common month: {}'.format(months[most_common_month-1])) | |
# display the most common day of week | |
most_common_day = df['day_of_week'].mode()[0] | |
print('Most common day: {}'.format(most_common_day)) | |
# display the most common start hour | |
most_common_hour = df['start_hr'].mode()[0] | |
print('Most common start hr: {}'.format(most_common_hour)) | |
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3))) | |
print('-'*40) | |
def station_stats(df): | |
"""Displays statistics on the most popular stations and trip.""" | |
print('\nCalculating The Most Popular Stations and Trip...\n') | |
start_time = time.time() | |
# display most commonly used start station | |
most_pop_start_st = df.loc[:,'Start Station'].mode()[0] | |
print('Most commonly used start station: "{}"'.format( most_pop_start_st)) | |
# display most commonly used end station | |
most_pop_end_st = df.loc[:,'End Station'].mode()[0] | |
print('Most commonly used end station: "{}"'.format( most_pop_end_st)) | |
# display most frequent combination of start station and end station trip | |
df['start_end_combo'] = (df['Start Station'] + ' - ' + df['End Station']).astype(str) | |
most_pop_start_end = df.loc[:,'start_end_combo'].mode()[0] | |
number_trips= df['start_end_combo'].value_counts(sort=True)[0] | |
print('Most frequent start/end station combination is "{}" - "{}" with {} trips'\ | |
.format(most_pop_start_end.split('-')[0], most_pop_start_end.split('-')[1] ,\ | |
number_trips)) | |
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3))) | |
print('-'*40) | |
def trip_duration_stats(df): | |
"""Displays statistics on the total and average trip duration.""" | |
print('\nCalculating Trip Duration...\n') | |
start_time = time.time() | |
# display total travel time | |
total_travel_minutes = int(df['Trip Duration'].sum()/60) | |
print('Total travel time is {:,} minutes.'.format(total_travel_minutes)) | |
# display mean travel time | |
mean_travel_minutes = int(df['Trip Duration'].mean()/60) | |
print('Mean travel time is {:,} minutes.'.format(mean_travel_minutes)) | |
longest_trip = int(df['Trip Duration'].max()/60) | |
print('Longest travel time is {:,} minutes.'.format(longest_trip)) | |
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3))) | |
print('-'*40) | |
def user_stats(df): | |
"""Displays statistics on bikeshare users.""" | |
print('\nCalculating User Stats...\n') | |
start_time = time.time() | |
#getting rid of users "born" in XIX century | |
if 'Birth Year' in df: | |
df = df[df.loc[:,'Birth Year'].gt(1900)] | |
earliest = int(df.loc[:,'Birth Year'].max()) | |
most_recent = int(df.loc[:,'Birth Year'].min()) | |
most_common_year= int(df.loc[:,'Birth Year'].mode()[0]) | |
# Display earliest, most recent, and most common year of birth | |
print('\nThe youngest biker was born in {},\n\ | |
the oldest biker was born in {},\n\ | |
and the most common year of birth is {}.\n'\ | |
.format(earliest, most_recent, most_common_year)) | |
# Display counts of user types | |
print('Counts per user types:\n {}'\ | |
.format(df.loc[:,'User Type'].value_counts(sort=True))) | |
# Display counts of gender, provided there's information in the dataframe | |
if 'Gender' in df: | |
print('\nCounts per gender:\n {}'\ | |
.format(df.loc[:,'Gender'].value_counts(sort=True))) | |
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3))) | |
print('-'*40) | |
def choose_stats(df): | |
"""Give the user the option to choose one stat type or all""" | |
print('To see time statistics, press 1') | |
print('To see station statistics, press 2') | |
print('To see trip duration statistics, press 3') | |
print('To see user statistics, press 4') | |
print('To restart, press 0') | |
menu_option = input('Choose the statistics you want to see, or "all": ') | |
while menu_option.lower() != 'all' \ | |
and menu_option != '0'\ | |
and menu_option != '1'\ | |
and menu_option != '2'\ | |
and menu_option != '3'\ | |
and menu_option != '4': | |
menu_option = input('Choose the statistics you want to see, or "all": ') | |
if menu_option == '0': | |
main() | |
if menu_option == '1': | |
time_stats(df) | |
if menu_option == '2': | |
station_stats(df) | |
if menu_option == '3': | |
trip_duration_stats(df) | |
if menu_option == '4': | |
user_stats(df) | |
if menu_option.lower() == 'all': | |
time_stats(df) | |
station_stats(df) | |
trip_duration_stats(df) | |
user_stats(df) | |
def main(): | |
raw_data_msg= '\nWould you like to see the raw data? Enter yes or no.\n' | |
while True: | |
city, month, day = get_filters() | |
df = load_data(city, month, day) | |
if df is not None and df.size > 0: | |
raw_data = input(raw_data_msg) | |
start = 0 | |
end = 4 | |
while raw_data.lower() != 'no': | |
print(df.loc[start:start+end,:]) | |
start+=end | |
raw_data = input(raw_data_msg) | |
choose_stats(df) | |
else: | |
print('No data was loaded') | |
restart = input('\nWould you like to restart? Enter yes or no.\n') | |
if restart.lower() != 'yes': | |
break | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
initial commit