Last active
July 3, 2016 12:06
-
-
Save AlessandraSozzi/a8d6266b97e4da346a91e4955f450833 to your computer and use it in GitHub Desktop.
Google Custom Search API - Get Next pages up to 100 results
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from search import SearchWithGoogle | |
import time | |
import pandas as pd | |
import config | |
gs = SearchWithGoogle("5Ps People, Planet, Partnership, Peace, and Prosperity") | |
gr = gs.search() | |
cx = config.Google['cx'] | |
google = build('customsearch', 'v1', developerKey = config.Google['APIkey']) | |
query = "test" | |
all_res = [] | |
# First Call | |
res = google.cse().list(q = query, cx = cx, num=10,).execute() | |
all_res.extend(res['items']) | |
def call_next(previous_response): | |
next_response = google.cse().list( | |
q=query, | |
cx=cx, | |
num=10, | |
start=previous_response['queries']['nextPage'][0]['startIndex'], | |
).execute() | |
return next_response | |
# Call following pages | |
prev = res | |
for t in range(260): | |
next_res = call_next(prev) | |
all_res.extend(next_res['items']) | |
time.sleep(5) | |
prev = next_res | |
# Create df with all the results | |
df = pd.DataFrame(columns = ['rank', 'snippet', 'title', 'url']) | |
for (rank, r) in enumerate(all_res): | |
link = r.get('link', {}) | |
newRes = [ rank, | |
r.get('snippet', {}), | |
r.get('title', {}), | |
link] | |
df.loc[len(df)] = newRes | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment