omendezmorales · February 13, 2021 16:06 · omendezmorales · Feb 13, 2021
diff --git a/bikeshare.py b/bikeshare.py
 import os
 import time

 import numpy as np
 import pandas as pd

 CITY_DATA = {'chicago': 'chicago.csv',
             'new york city': 'new_york_city.csv',
             'washington': 'washington.csv'}

 months = ['january', 'february', 'march', 'april', 'may', 'june']

 def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    mo_msg = 'enter name of the month to filter by (january, february, ... , june), or "all" to apply no month filter: '
    da_msg = 'enter name of the day of week to filter by( monday, tuesday, ... sunday), or "all" to apply no day filter: '

    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input('Please enter the city you want to explore data from: ')
    print(city)

    while city.lower() != 'chicago' \
            and city.lower() != 'new york city'\
            and city.lower() != 'washington':
        print(city)
        city = input('please enter the city you want to explore data from: ')

    # get user input for month (all, january, february, ... , june)
    month = input(mo_msg)
    print(month)
    while month.lower() != 'january' and month.lower() != 'february' and month.lower() != 'march'\
            and month.lower() != 'april' and month.lower() != 'may' and month.lower() != 'june'\
            and month.lower() != 'all':
        month = input(mo_msg)
        print(month)

    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = input(da_msg)
    print(day)
    while day.lower() != 'monday' and day.lower() != 'tuesday' and day.lower() != 'wednesday'\
            and day.lower() != 'thursday' and day.lower() != 'friday' and day.lower() != 'saturday'\
            and day.lower() != 'sunday' and day.lower() != 'all':
        day = input(da_msg)
        print(day)

    print('using city: {}, month: {}, and day: {}\n'.format(city, month, day))
    print('-'*40)

    return city, month, day


 def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into a dataframe
    try:
        print('\nLoading data...\n')
        start_time = time.time()
        file_path= os.path.dirname(__file__)+ '/'+ CITY_DATA[city.lower()]
        df = pd.read_csv(file_path )

        # convert the Start Time column to datetime
        df['Start_Time'] = pd.to_datetime(df['Start Time'])

        # extract month and day of week from Start Time to create new columns
        df['month'] = [d.month for d in df['Start_Time']]
        df['day_of_week'] = [d.day_name() for d in df['Start_Time']]
        df['start_hr']= [d.hour for d in df['Start_Time']]
        
        # filter by month if applicable
        if month.lower() != 'all':
            # use the index of the months list to get the corresponding int
            mo = months.index(month) + 1

            # filter by month to create the new dataframe
            df = df[df["month"] == mo]

        # filter by day of week if applicable
        if day.lower() != 'all':
            # filter by day of week to create the new dataframe
            df = df[df["day_of_week"] == day.title()]
        print("\nLoad took %s seconds." % (round(time.time() - start_time,3)))
        print('-'*40)
        return df
    
    except Exception as e:
        print("Error occurred: {}".format(e))

    

 def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_common_month = df['month'].mode()[0]
    print('Most common month: {}'.format(months[most_common_month-1]))

    # display the most common day of week
    most_common_day = df['day_of_week'].mode()[0]
    print('Most common day: {}'.format(most_common_day))

    # display the most common start hour

    most_common_hour = df['start_hr'].mode()[0]

    print('Most common start hr: {}'.format(most_common_hour))

    print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
    print('-'*40)


 def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    most_pop_start_st = df.loc[:,'Start Station'].mode()[0]
    print('Most commonly used start station: "{}"'.format( most_pop_start_st))
    # display most commonly used end station
    most_pop_end_st = df.loc[:,'End Station'].mode()[0]
    print('Most commonly used end station: "{}"'.format( most_pop_end_st))

    # display most frequent combination of start station and end station trip
    df['start_end_combo'] = (df['Start Station'] + ' - ' + df['End Station']).astype(str) 
    
    most_pop_start_end = df.loc[:,'start_end_combo'].mode()[0]
    number_trips= df['start_end_combo'].value_counts(sort=True)[0]
    print('Most frequent start/end station combination is "{}" - "{}" with {} trips'\
        .format(most_pop_start_end.split('-')[0], most_pop_start_end.split('-')[1] ,\
         number_trips))

    
    print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
    print('-'*40)


 def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""
    
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel_minutes = int(df['Trip Duration'].sum()/60)
    print('Total travel time is {:,} minutes.'.format(total_travel_minutes))

    # display mean travel time
    mean_travel_minutes = int(df['Trip Duration'].mean()/60)
    print('Mean travel time is {:,} minutes.'.format(mean_travel_minutes))

    longest_trip = int(df['Trip Duration'].max()/60)
    print('Longest travel time is {:,} minutes.'.format(longest_trip))

    print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
    print('-'*40)


 def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()
    #getting rid of users "born" in XIX century
    if  'Birth Year' in df:
        df = df[df.loc[:,'Birth Year'].gt(1900)]
        earliest = int(df.loc[:,'Birth Year'].max())
        most_recent =  int(df.loc[:,'Birth Year'].min())
        most_common_year= int(df.loc[:,'Birth Year'].mode()[0])
        # Display earliest, most recent, and most common year of birth
        print('\nThe youngest biker was born in {},\n\
        the oldest biker was born in {},\n\
        and the most common year of birth is {}.\n'\
        .format(earliest, most_recent, most_common_year))
    
    # Display counts of user types
    print('Counts per user types:\n {}'\
        .format(df.loc[:,'User Type'].value_counts(sort=True)))
    
    # Display counts of gender, provided there's information in the dataframe
    if 'Gender' in df:
            print('\nCounts per gender:\n {}'\
            .format(df.loc[:,'Gender'].value_counts(sort=True)))

   
    print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
    print('-'*40)

 def choose_stats(df):
    """Give the user the option to choose one stat type or all"""
    print('To see time statistics, press 1')
    print('To see station statistics, press 2')
    print('To see trip duration statistics, press 3')
    print('To see user statistics, press 4')
    print('To restart, press 0')
    menu_option = input('Choose the statistics you want to see, or "all": ')
    while menu_option.lower() != 'all' \
            and menu_option != '0'\
            and menu_option != '1'\
            and menu_option != '2'\
            and menu_option != '3'\
            and menu_option != '4':
            menu_option = input('Choose the statistics you want to see, or "all": ')
    if menu_option == '0':
        main()
    if menu_option == '1':
        time_stats(df)
    if menu_option == '2':
        station_stats(df)
    if menu_option == '3':
        trip_duration_stats(df)
    if menu_option == '4':
        user_stats(df)
    if menu_option.lower() == 'all':
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

 def main():
    raw_data_msg= '\nWould you like to see the raw data? Enter yes or no.\n'
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        if df is not None and df.size > 0:
            raw_data = input(raw_data_msg)
            start = 0
            end = 4
            while raw_data.lower() != 'no':
                print(df.loc[start:start+end,:])
                start+=end
                raw_data = input(raw_data_msg)
                
            choose_stats(df)
        else:
            print('No data was loaded')
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


 if __name__ == "__main__":
    main()
	import os
	import time

	import numpy as np
	import pandas as pd

	CITY_DATA = {'chicago': 'chicago.csv',
	'new york city': 'new_york_city.csv',
	'washington': 'washington.csv'}

	months = ['january', 'february', 'march', 'april', 'may', 'june']

	def get_filters():
	"""
	Asks user to specify a city, month, and day to analyze.

	Returns:
	(str) city - name of the city to analyze
	(str) month - name of the month to filter by, or "all" to apply no month filter
	(str) day - name of the day of week to filter by, or "all" to apply no day filter
	"""
	print('Hello! Let\'s explore some US bikeshare data!')
	mo_msg = 'enter name of the month to filter by (january, february, ... , june), or "all" to apply no month filter: '
	da_msg = 'enter name of the day of week to filter by( monday, tuesday, ... sunday), or "all" to apply no day filter: '

	# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
	city = input('Please enter the city you want to explore data from: ')
	print(city)

	while city.lower() != 'chicago' \
	and city.lower() != 'new york city'\
	and city.lower() != 'washington':
	print(city)
	city = input('please enter the city you want to explore data from: ')

	# get user input for month (all, january, february, ... , june)
	month = input(mo_msg)
	print(month)
	while month.lower() != 'january' and month.lower() != 'february' and month.lower() != 'march'\
	and month.lower() != 'april' and month.lower() != 'may' and month.lower() != 'june'\
	and month.lower() != 'all':
	month = input(mo_msg)
	print(month)

	# get user input for day of week (all, monday, tuesday, ... sunday)
	day = input(da_msg)
	print(day)
	while day.lower() != 'monday' and day.lower() != 'tuesday' and day.lower() != 'wednesday'\
	and day.lower() != 'thursday' and day.lower() != 'friday' and day.lower() != 'saturday'\
	and day.lower() != 'sunday' and day.lower() != 'all':
	day = input(da_msg)
	print(day)

	print('using city: {}, month: {}, and day: {}\n'.format(city, month, day))
	print('-'*40)

	return city, month, day


	def load_data(city, month, day):
	"""
	Loads data for the specified city and filters by month and day if applicable.

	Args:
	(str) city - name of the city to analyze
	(str) month - name of the month to filter by, or "all" to apply no month filter
	(str) day - name of the day of week to filter by, or "all" to apply no day filter
	Returns:
	df - Pandas DataFrame containing city data filtered by month and day
	"""
	# load data file into a dataframe
	try:
	print('\nLoading data...\n')
	start_time = time.time()
	file_path= os.path.dirname(__file__)+ '/'+ CITY_DATA[city.lower()]
	df = pd.read_csv(file_path )

	# convert the Start Time column to datetime
	df['Start_Time'] = pd.to_datetime(df['Start Time'])

	# extract month and day of week from Start Time to create new columns
	df['month'] = [d.month for d in df['Start_Time']]
	df['day_of_week'] = [d.day_name() for d in df['Start_Time']]
	df['start_hr']= [d.hour for d in df['Start_Time']]

	# filter by month if applicable
	if month.lower() != 'all':
	# use the index of the months list to get the corresponding int
	mo = months.index(month) + 1

	# filter by month to create the new dataframe
	df = df[df["month"] == mo]

	# filter by day of week if applicable
	if day.lower() != 'all':
	# filter by day of week to create the new dataframe
	df = df[df["day_of_week"] == day.title()]
	print("\nLoad took %s seconds." % (round(time.time() - start_time,3)))
	print('-'*40)
	return df

	except Exception as e:
	print("Error occurred: {}".format(e))



	def time_stats(df):
	"""Displays statistics on the most frequent times of travel."""

	print('\nCalculating The Most Frequent Times of Travel...\n')
	start_time = time.time()

	# display the most common month
	most_common_month = df['month'].mode()[0]
	print('Most common month: {}'.format(months[most_common_month-1]))

	# display the most common day of week
	most_common_day = df['day_of_week'].mode()[0]
	print('Most common day: {}'.format(most_common_day))

	# display the most common start hour

	most_common_hour = df['start_hr'].mode()[0]

	print('Most common start hr: {}'.format(most_common_hour))

	print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
	print('-'*40)


	def station_stats(df):
	"""Displays statistics on the most popular stations and trip."""

	print('\nCalculating The Most Popular Stations and Trip...\n')
	start_time = time.time()

	# display most commonly used start station
	most_pop_start_st = df.loc[:,'Start Station'].mode()[0]
	print('Most commonly used start station: "{}"'.format( most_pop_start_st))
	# display most commonly used end station
	most_pop_end_st = df.loc[:,'End Station'].mode()[0]
	print('Most commonly used end station: "{}"'.format( most_pop_end_st))

	# display most frequent combination of start station and end station trip
	df['start_end_combo'] = (df['Start Station'] + ' - ' + df['End Station']).astype(str)

	most_pop_start_end = df.loc[:,'start_end_combo'].mode()[0]
	number_trips= df['start_end_combo'].value_counts(sort=True)[0]
	print('Most frequent start/end station combination is "{}" - "{}" with {} trips'\
	.format(most_pop_start_end.split('-')[0], most_pop_start_end.split('-')[1] ,\
	number_trips))


	print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
	print('-'*40)


	def trip_duration_stats(df):
	"""Displays statistics on the total and average trip duration."""

	print('\nCalculating Trip Duration...\n')
	start_time = time.time()

	# display total travel time
	total_travel_minutes = int(df['Trip Duration'].sum()/60)
	print('Total travel time is {:,} minutes.'.format(total_travel_minutes))

	# display mean travel time
	mean_travel_minutes = int(df['Trip Duration'].mean()/60)
	print('Mean travel time is {:,} minutes.'.format(mean_travel_minutes))

	longest_trip = int(df['Trip Duration'].max()/60)
	print('Longest travel time is {:,} minutes.'.format(longest_trip))

	print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
	print('-'*40)


	def user_stats(df):
	"""Displays statistics on bikeshare users."""

	print('\nCalculating User Stats...\n')
	start_time = time.time()
	#getting rid of users "born" in XIX century
	if 'Birth Year' in df:
	df = df[df.loc[:,'Birth Year'].gt(1900)]
	earliest = int(df.loc[:,'Birth Year'].max())
	most_recent = int(df.loc[:,'Birth Year'].min())
	most_common_year= int(df.loc[:,'Birth Year'].mode()[0])
	# Display earliest, most recent, and most common year of birth
	print('\nThe youngest biker was born in {},\n\
	the oldest biker was born in {},\n\
	and the most common year of birth is {}.\n'\
	.format(earliest, most_recent, most_common_year))

	# Display counts of user types
	print('Counts per user types:\n {}'\
	.format(df.loc[:,'User Type'].value_counts(sort=True)))

	# Display counts of gender, provided there's information in the dataframe
	if 'Gender' in df:
	print('\nCounts per gender:\n {}'\
	.format(df.loc[:,'Gender'].value_counts(sort=True)))


	print("\nCalculation took %s seconds." % (round(time.time() - start_time,3)))
	print('-'*40)

	def choose_stats(df):
	"""Give the user the option to choose one stat type or all"""
	print('To see time statistics, press 1')
	print('To see station statistics, press 2')
	print('To see trip duration statistics, press 3')
	print('To see user statistics, press 4')
	print('To restart, press 0')
	menu_option = input('Choose the statistics you want to see, or "all": ')
	while menu_option.lower() != 'all' \
	and menu_option != '0'\
	and menu_option != '1'\
	and menu_option != '2'\
	and menu_option != '3'\
	and menu_option != '4':
	menu_option = input('Choose the statistics you want to see, or "all": ')
	if menu_option == '0':
	main()
	if menu_option == '1':
	time_stats(df)
	if menu_option == '2':
	station_stats(df)
	if menu_option == '3':
	trip_duration_stats(df)
	if menu_option == '4':
	user_stats(df)
	if menu_option.lower() == 'all':
	time_stats(df)
	station_stats(df)
	trip_duration_stats(df)
	user_stats(df)

	def main():
	raw_data_msg= '\nWould you like to see the raw data? Enter yes or no.\n'
	while True:
	city, month, day = get_filters()
	df = load_data(city, month, day)
	if df is not None and df.size > 0:
	raw_data = input(raw_data_msg)
	start = 0
	end = 4
	while raw_data.lower() != 'no':
	print(df.loc[start:start+end,:])
	start+=end
	raw_data = input(raw_data_msg)

	choose_stats(df)
	else:
	print('No data was loaded')
	restart = input('\nWould you like to restart? Enter yes or no.\n')
	if restart.lower() != 'yes':
	break


	if __name__ == "__main__":
	main()