Created
September 21, 2015 09:15
-
-
Save r4vi/2c121bdafdb6a063688a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from requests_futures.sessions import FuturesSession, bs4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from bs4 import H" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from bs4 import BeautifulSoup" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp = requests.get(BASE_URL + '1')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"BASE_URL = 'http://events.londonopenhouse.org/Venues?q=&Page='" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"MAX_PAGES = 71" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp = requests.get(BASE_URL + '1')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import lxml, cssselect, requests" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp = requests.get(BASE_URL + '1')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"soup = BeautifulSoup(resp.content)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"souo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"soup" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"soup.select('.listing-detail')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(soup.select('.listing-detail'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from requests_futures.sessions import FuturesSession" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"session = FuturesSession(max_workers=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urlrs = [x for x in range(MAX_PAGES)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls = [x for x in range(MAX_PAGES)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls = [x for x in range(MAX_PAGES+1)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls = [BASE_URL+str(x) for x in range(MAX_PAGES+1)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls = [BASE_URL+str(x) for x in range(MAX_PAGES+1)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resps = [session.get(x) for x in urls]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resps" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resps_results = [x.result() for x in resps]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resps_results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(resps_results)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resps_listings = [BeautifulSoup(x.content).select('.listing-detail') for x in resps_results]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(resps_listings)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resps_listings[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import itertools" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details list(itertools.chain.from_iterable(resps_listings))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details = list(itertools.chain.from_iterable(resps_listings))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(listing_details)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d1 = listing_details[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"type(d1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d1.select('.address')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d1.select_one('address')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d1.select_one('address').text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'address': d1.select_one('address').text }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'address': d1.select_one('address').text, 'name': d1.select_one('a').text }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'address': d1.select_one('address').text, 'name': d1.select_one('a').text, 'detail': d1.select_one('a')['href'] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'address': d1.select_one('address').text, 'name': d1.select_one('a').text, 'detail': d1.select_one('a')['href'] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def tag_to_dict(d1):\n", | |
" return {'address': d1.select_one('address').text, 'name': d1.select_one('a').text, 'detail': d1.select_one('a')['href'] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts = [tag_to_dict(x) for x in listing_details]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"BASE_URL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"BASE_DETAL_URL = 'http://events.londonopenhouse.org'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" x['detail'] = BASE_DETAL_URL + x['detail']\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" x['detail_content'] = requests.get(x['detail'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"c1 = listing_details_dicts['detail_content']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"c1 = listing_details_dicts[0]['detail_content']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"c1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs = BeautifulSoup(c1.content)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs.select('#body')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs.select('#body').select_one('div[data-body]')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs.select_one('#body').select_one('div[data-body]')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs.select_one('#body').select_one('div[data-body]').text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs.select_one('#body').select_one('div[data-body]').text.strip()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': bs.select_one('#body').select_one('div[data-body]').text.strip() }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': bs.select_one('#body').select_one('div[data-body]').text.strip(), }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': bs.select_one('#body').select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': ''}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body = bs.select_one('#body')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 72, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': body.select('a[href^=/venues]') }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': body.select('a[href^=/venues/accessTimeIds]') }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': body.select('a[href^=/venues?accessTimeIds]') }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.text for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.text.strip() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.text.strip() for x in body.select('table tr')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.text.strip() for x in body.select('table tr')[0]] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': body.select('table tr') }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 80, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent().text.strip() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent(1) for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent().parent() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 84, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent() for x in body.select('a[href^=/venues?accessTimeIds]').parent()] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.getparent() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [type(x) for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 89, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent.text for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 90, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent.text.strip() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent.text.strip() for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent.text.strip().split('|') for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [y.strip() for y in [x.parent.text.strip().split('|') for x in body.select('a[href^=/venues?accessTimeIds]')]] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 94, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [y.strip() for y in [x.parent.text.strip().split('|') for x in body.select('a[href^=/venues?accessTimeIds]')]] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 95, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [x.parent.text.strip().split('|') for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 96, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [[y.strip() for y in x.parent.text.strip().split('|')] for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 97, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [','.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 98, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"{'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [', '.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def resp_to_detail(resp):\n", | |
" bs = bs4(resp.content)\n", | |
" body = bs.select_one('#body')\n", | |
" return {'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [', '.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 100, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts['detail_response'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 101, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[0]['detail_response'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[0]['detail_resp'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[0]['detail_content'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[0]['detail_content'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def resp_to_detail(resp):\n", | |
" bs = BeautifulSoup(resp.content)\n", | |
" body = bs.select_one('#body')\n", | |
" return {'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [', '.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')] }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[0]['detail_content'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 110, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.tet" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.text.strip()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 116, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:list')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 118, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:last')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 119, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(1)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 120, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(2)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 121, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(2)').text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 122, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(2)').text.strip()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def resp_to_detail(resp):\n", | |
" bs = BeautifulSoup(resp.content)\n", | |
" body = bs.select_one('#body')\n", | |
" return {'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [', '.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')], \n", | |
" 'last_entry': In [122]: body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(2)').text.strip()\n", | |
"} " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 124, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def resp_to_detail(resp):\n", | |
" bs = BeautifulSoup(resp.content)\n", | |
" body = bs.select_one('#body')\n", | |
" return {'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [', '.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')], \n", | |
" 'last_entry': body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(2)').text.strip()\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 125, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[0]['detail_content'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 126, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" d2 = resp_to_detail(x['detail_content'])\n", | |
" x.update(d2)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 128, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" print resp_to_detail(x['detail_content'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 129, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" print(resp_to_detail(x['detail_content']))\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 130, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[1]['detail_content'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 131, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" d2 = resp_to_detail(x['detail_content'])\n", | |
" x.update(d2)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 132, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import simplekml" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 133, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import geopy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 134, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def resp_to_detail(resp):\n", | |
" bs = BeautifulSoup(resp.content)\n", | |
" body = bs.select_one('#body')\n", | |
" return {'description': body.select_one('div[data-body]').text.strip(),\n", | |
" 'open_times': [', '.join([y.strip() for y in x.parent.text.strip().split('|')]) for x in body.select('a[href^=/venues?accessTimeIds]')], \n", | |
" #'last_entry': body.select_one('label[for=Venue_LastEntryTime]').parent.parent.select_one('td:nth-of-type(2)').text.strip()\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 135, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp_to_detail(listing_details_dicts[1]['detail_content'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 136, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" d2 = resp_to_detail(x['detail_content'])\n", | |
" x.update(d2)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 137, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 138, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from geopy.geocoders import Nominatim" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 139, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"geolocator = Nominatim()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 140, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"geolocator.geocode(listing_details_dicts[0]['address'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 141, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"geolocator.geocode(listing_details_dicts[0]['address'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 142, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" x['loc'] = geolocator.geocode(x['address'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 143, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"geolocator = Nominatim(timeout=5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 144, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" x['loc'] = geolocator.geocode(x['address'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 145, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 146, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 147, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' not in x: x['loc'] = geolocator.geocode(x['address'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 148, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x for x in listing_details_dicts if 'loc' not in x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 149, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"[x for x in listing_details_dicts if 'loc' not in x]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 150, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len([x for x in listing_details_dicts if 'loc' not in x])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 151, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len([x for x in listing_details_dicts if 'loc' in x])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 152, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' not in x: x['loc'] = geolocator.geocode(x['address'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 153, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' not in x: print x['address']; x['loc'] = geolocator.geocode(x['address'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 154, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' not in x: print(x['address']); x['loc'] = geolocator.geocode(x['address'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 155, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' not in x: print(x['address']); try: x['loc'] = geolocator.geocode(x['address']); except: pass;\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 156, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' not in x: print(x['address'])\n", | |
" try:\n", | |
" x['loc'] = geolocator.geocode(x['address'])\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 157, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len([x for x in listing_details_dicts if 'loc' in x])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 158, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 159, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0].loc" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 160, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]['loc']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 161, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]['loc'].point" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 162, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dir(listing_details_dicts[0]['loc'].point)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 163, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]['loc'].point" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]['loc'].point[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]['loc'].point[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 166, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 167, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.newpoint?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 168, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.newpoint??" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 169, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.newpoint???" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 170, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.newpoint??" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 171, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc):\n", | |
" doc\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 172, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" doc.newpoint(name=item['name'])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 173, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 174, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + '\\n' + item['detail'] + '\\n'.join(item['open_times']) )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 175, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + '\\n' + item['detail'] + '\\n'.join(item['open_times']), coords=item['loc'].point )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 176, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"make_point(kml, listing_details_dicts[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 177, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(listing_details_dicts[0]['loc'].point)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 178, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + '\\n' + item['detail'] + '\\n'.join(item['open_times']), coords=item['loc'].point.long )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 179, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p = listing_details_dicts[0]['loc'].point" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 180, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p.longitude" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 181, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p.latitude" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 182, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + '\\n' + item['detail'] + '\\n'.join(item['open_times']), coords=(item['loc'].point.longitude, item['loc'].point.latitude ,) )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 183, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"make_point(kml, listing_details_dicts[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 184, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + '\\n' + item['detail'] + '\\n'.join(item['open_times']), coords=[(item['loc'].point.longitude, item['loc'].point.latitude ,)] )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 185, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 186, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 187, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"make_point(kml, listing_details_dicts[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 188, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"make_point(kml, listing_details_dicts[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 189, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 190, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 191, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p.longitude" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 192, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 193, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 194, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" make_point(kml, x)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 195, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" make_point(kml, x)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 196, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 197, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 198, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 199, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 200, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 201, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" try:\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 202, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" try:\n", | |
" make_point(kml, x)\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 203, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 204, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.save('/tmp/openhouse2015.kml')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 205, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + ' \\n ' + item['detail'] + ' \\n'.join(item['open_times']), coords=[(item['loc'].point.longitude, item['loc'].point.latitude ,)] )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 206, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 207, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 208, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" try:\n", | |
" make_point(kml, x)\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 209, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.save('/tmp/openhouse2015.kml')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 210, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + ' \\n'.join(item['open_times'] + ' \\n ' + item['detail']), coords=[(item['loc'].point.longitude, item['loc'].point.latitude ,)] )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 211, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 212, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" try:\n", | |
" make_point(kml, x)\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 213, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.save('/tmp/openhouse2015.kml')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 214, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_point(doc, item):\n", | |
" return doc.newpoint(name=item['name'], description=item['description'] + ' \\n'.join(item['open_times']) + ' \\n ' + item['detail'], coords=[(item['loc'].point.longitude, item['loc'].point.latitude ,)] )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 215, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 216, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'loc' in x:\n", | |
" try:\n", | |
" make_point(kml, x)\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 217, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.save('/tmp/openhouse2015.kml')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 218, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sunday_listings = []" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 219, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"listing_details_dicts[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 220, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"'Sunday' in listing_details_dicts[0]['opening_times']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 221, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"'Sunday' in listing_details_dicts[0]['open_times']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 222, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"'Sunday' in ' '.join(listing_details_dicts[0]['open_times'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 223, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"'Sunday' in ' '.join(listing_details_dicts[0]['open_times']).lower()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 224, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"'sunday' in ' '.join(listing_details_dicts[0]['open_times']).lower()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 225, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sunday_listings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 226, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" print('x')\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 227, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in listing_details_dicts:\n", | |
" if 'sunday' in ' '.join(x['open_times']).lower():\n", | |
" sunday_listings.append(x)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 228, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml = simplekml.Kml()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 229, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for x in sunday_listings:\n", | |
" if 'loc' in x:\n", | |
" try:\n", | |
" make_point(kml, x)\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 230, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"kml.save('/tmp/open_sunday.kml')" | |
] | |
} | |
], | |
"metadata": {}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment