Skip to content

Instantly share code, notes, and snippets.

@omendezmorales
Created February 13, 2021 16:06
Show Gist options
  • Save omendezmorales/1e6e0a2f3ae646a58492c0f541c3ed80 to your computer and use it in GitHub Desktop.
Save omendezmorales/1e6e0a2f3ae646a58492c0f541c3ed80 to your computer and use it in GitHub Desktop.
Project 2 Udacity
import os
import time
import numpy as np
import pandas as pd
CITY_DATA = {'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv'}
months = ['january', 'february', 'march', 'april', 'may', 'june']
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let\'s explore some US bikeshare data!')
mo_msg = 'enter name of the month to filter by (january, february, ... , june), or "all" to apply no month filter: '
da_msg = 'enter name of the day of week to filter by( monday, tuesday, ... sunday), or "all" to apply no day filter: '
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
city = input('Please enter the city you want to explore data from: ')
print(city)
while city.lower() != 'chicago' \
and city.lower() != 'new york city'\
and city.lower() != 'washington':
print(city)
city = input('please enter the city you want to explore data from: ')
# get user input for month (all, january, february, ... , june)
month = input(mo_msg)
print(month)
while month.lower() != 'january' and month.lower() != 'february' and month.lower() != 'march'\
and month.lower() != 'april' and month.lower() != 'may' and month.lower() != 'june'\
and month.lower() != 'all':
month = input(mo_msg)
print(month)
# get user input for day of week (all, monday, tuesday, ... sunday)
day = input(da_msg)
print(day)
while day.lower() != 'monday' and day.lower() != 'tuesday' and day.lower() != 'wednesday'\
and day.lower() != 'thursday' and day.lower() != 'friday' and day.lower() != 'saturday'\
and day.lower() != 'sunday' and day.lower() != 'all':
day = input(da_msg)
print(day)
print('using city: {}, month: {}, and day: {}\n'.format(city, month, day))
print('-'*40)
return city, month, day
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
# load data file into a dataframe
try:
print('\nLoading data...\n')
start_time = time.time()
file_path= os.path.dirname(__file__)+ '/'+ CITY_DATA[city.lower()]
df = pd.read_csv(file_path )
# convert the Start Time column to datetime
df['Start_Time'] = pd.to_datetime(df['Start Time'])
# extract month and day of week from Start Time to create new columns
df['month'] = [d.month for d in df['Start_Time']]
df['day_of_week'] = [d.day_name() for d in df['Start_Time']]
df['start_hr']= [d.hour for d in df['Start_Time']]
# filter by month if applicable
if month.lower() != 'all':
# use the index of the months list to get the corresponding int
mo = months.index(month) + 1
# filter by month to create the new dataframe
df = df[df["month"] == mo]
# filter by day of week if applicable
if day.lower() != 'all':
# filter by day of week to create the new dataframe
df = df[df["day_of_week"] == day.title()]
print("\nLoad took %s seconds." % (round(time.time() - start_time,3)))
print('-'*40)
return df
except Exception as e:
print("Error occurred: {}".format(e))
def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
print('\nCalculating The Most Frequent Times of Travel...\n')
start_time = time.time()
# display the most common month
most_common_month = df['month'].mode()[0]
print('Most common month: {}'.format(months[most_common_month-1]))
# display the most common day of week
most_common_day = df['day_of_week'].mode()[0]
print('Most common day: {}'.format(most_common_day))
# display the most common start hour
most_common_hour = df['start_hr'].mode()[0]
print('Most common start hr: {}'.format(most_common_hour))
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
print('-'*40)
def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
print('\nCalculating The Most Popular Stations and Trip...\n')
start_time = time.time()
# display most commonly used start station
most_pop_start_st = df.loc[:,'Start Station'].mode()[0]
print('Most commonly used start station: "{}"'.format( most_pop_start_st))
# display most commonly used end station
most_pop_end_st = df.loc[:,'End Station'].mode()[0]
print('Most commonly used end station: "{}"'.format( most_pop_end_st))
# display most frequent combination of start station and end station trip
df['start_end_combo'] = (df['Start Station'] + ' - ' + df['End Station']).astype(str)
most_pop_start_end = df.loc[:,'start_end_combo'].mode()[0]
number_trips= df['start_end_combo'].value_counts(sort=True)[0]
print('Most frequent start/end station combination is "{}" - "{}" with {} trips'\
.format(most_pop_start_end.split('-')[0], most_pop_start_end.split('-')[1] ,\
number_trips))
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
print('-'*40)
def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
print('\nCalculating Trip Duration...\n')
start_time = time.time()
# display total travel time
total_travel_minutes = int(df['Trip Duration'].sum()/60)
print('Total travel time is {:,} minutes.'.format(total_travel_minutes))
# display mean travel time
mean_travel_minutes = int(df['Trip Duration'].mean()/60)
print('Mean travel time is {:,} minutes.'.format(mean_travel_minutes))
longest_trip = int(df['Trip Duration'].max()/60)
print('Longest travel time is {:,} minutes.'.format(longest_trip))
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
print('-'*40)
def user_stats(df):
"""Displays statistics on bikeshare users."""
print('\nCalculating User Stats...\n')
start_time = time.time()
#getting rid of users "born" in XIX century
if 'Birth Year' in df:
df = df[df.loc[:,'Birth Year'].gt(1900)]
earliest = int(df.loc[:,'Birth Year'].max())
most_recent = int(df.loc[:,'Birth Year'].min())
most_common_year= int(df.loc[:,'Birth Year'].mode()[0])
# Display earliest, most recent, and most common year of birth
print('\nThe youngest biker was born in {},\n\
the oldest biker was born in {},\n\
and the most common year of birth is {}.\n'\
.format(earliest, most_recent, most_common_year))
# Display counts of user types
print('Counts per user types:\n {}'\
.format(df.loc[:,'User Type'].value_counts(sort=True)))
# Display counts of gender, provided there's information in the dataframe
if 'Gender' in df:
print('\nCounts per gender:\n {}'\
.format(df.loc[:,'Gender'].value_counts(sort=True)))
print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
print('-'*40)
def choose_stats(df):
"""Give the user the option to choose one stat type or all"""
print('To see time statistics, press 1')
print('To see station statistics, press 2')
print('To see trip duration statistics, press 3')
print('To see user statistics, press 4')
print('To restart, press 0')
menu_option = input('Choose the statistics you want to see, or "all": ')
while menu_option.lower() != 'all' \
and menu_option != '0'\
and menu_option != '1'\
and menu_option != '2'\
and menu_option != '3'\
and menu_option != '4':
menu_option = input('Choose the statistics you want to see, or "all": ')
if menu_option == '0':
main()
if menu_option == '1':
time_stats(df)
if menu_option == '2':
station_stats(df)
if menu_option == '3':
trip_duration_stats(df)
if menu_option == '4':
user_stats(df)
if menu_option.lower() == 'all':
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
def main():
raw_data_msg= '\nWould you like to see the raw data? Enter yes or no.\n'
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
if df is not None and df.size > 0:
raw_data = input(raw_data_msg)
start = 0
end = 4
while raw_data.lower() != 'no':
print(df.loc[start:start+end,:])
start+=end
raw_data = input(raw_data_msg)
choose_stats(df)
else:
print('No data was loaded')
restart = input('\nWould you like to restart? Enter yes or no.\n')
if restart.lower() != 'yes':
break
if __name__ == "__main__":
main()
@omendezmorales
Copy link
Author

initial commit

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment