Skip to content

Instantly share code, notes, and snippets.

@monkeycycle
Created July 3, 2018 20:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save monkeycycle/d7c32077f820ce07004bad5872c7d207 to your computer and use it in GitHub Desktop.
Save monkeycycle/d7c32077f820ce07004bad5872c7d207 to your computer and use it in GitHub Desktop.
Sunrise Sunset Scraper
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime
file_out = 'sunrise_sunset_toronto.csv'
base_url = 'https://www.timeanddate.com/sun/canada/toronto?month=5&year=2018'
base_city = 'toronto'
base_year = '2017'
root_el_id = 'as-monthsun'
# Open a CSV file for scraping output
f = csv.writer(open(file_out, 'w'))
f.writerow(["date", "month_day", "sunrise", "sunset", "twlight_astro_start", "twlight_astro_end", "twlight_naut_start", "twlight_naut_end", "twlight_civil_start", "twlight_civil_end"])
# Assemble list of pages to scrape
pages = []
for i in range(2007, 2017):
base_year = i
for j in range(1, 13):
url = str(base_url) + 'base_city?month=' + str(j) + '&' + str(base_year)
if j < 10:
month = '0' + str(j)
else:
month = str(j)
year = str(base_year)
pages.append([month, year, url])
for item in pages:
page = requests.get(item[2])
soup = BeautifulSoup(page.text, 'html.parser')
# Sunrise sunset data is in the #as-monthsun table
table_sun = soup.find("table", {"id": root_el_id})
table_sun_body = table_sun.find("tbody")
table_sun_rows = table_sun_body.find_all('tr')
for table_sun_row in table_sun_rows:
month_day = table_sun_row.contents[0].text.strip()
month_day = int(month_day)
if month_day < 10:
month_day = '0' + str(month_day)
else:
month_day = str(month_day)
date = str(item[0]) + '-' + str(month_day) + '-' + str(item[1])
sunrise = table_sun_row.contents[1].text.strip()
sunrise_tmp = sunrise.split(" am ")
sunrise = sunrise_tmp[0]
sunset = table_sun_row.contents[2].text.strip()
sunset_tmp = sunset.split(" pm ")
sunset = sunset_tmp[0]
twlight_astro_start = table_sun_row.contents[5].text.strip()
twlight_astro_end = table_sun_row.contents[6].text.strip()
twlight_naut_start = table_sun_row.contents[7].text.strip()
twlight_naut_end = table_sun_row.contents[8].text.strip()
twlight_civil_start = table_sun_row.contents[9].text.strip()
twlight_civil_end = table_sun_row.contents[10].text.strip()
f.writerow([date, month_day, sunrise, sunset, twlight_astro_start, twlight_astro_end, twlight_naut_start, twlight_naut_end, twlight_civil_start, twlight_civil_end ])
time.sleep(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment