Skip to content

Instantly share code, notes, and snippets.

@sjockers
Last active April 17, 2018 10:42
Show Gist options
  • Save sjockers/071499234f4ede6fa508efc18c246757 to your computer and use it in GitHub Desktop.
Save sjockers/071499234f4ede6fa508efc18c246757 to your computer and use it in GitHub Desktop.
Scrape the 2013 Bundestag election results for individual constituencies from Wikipedia using artoo.js
// Scrape the 2013 Bundestag election results for individual constituencies from Wikipedia
// - using artoo.js v0.3.3.1 (http://medialab.github.io/artoo/)
// - starting point: https://de.wikipedia.org/wiki/Liste_der_Bundestagswahlkreise_2013
var result = []
var baseUrl = 'https://de.wikipedia.org'
var pages = artoo.scrape('.wikitable:nth-of-type(2) td:nth-of-type(2) a,\
.wikitable:nth-of-type(2) td:nth-of-type(6) a', function() {
return baseUrl + $(this).attr('href')
})
function findRows (doc) {
return doc.find('h2:contains("Bundestagswahl 2013"), h3:contains("Bundestagswahl 2013")')
.nextAll('.sortable')
.first()
.find('tr')
}
function parseRows (rows, doc) {
var constituencyId = doc.find('.infobox td:contains("Wahlkreisnummer")').next().text()
var constituencyName = doc.find('h1.firstHeading').text()
return rows.map(function (i, tr) {
return parseRow(tr, constituencyName, constituencyId)
}).toArray()
}
function parseRow (tr, constituencyName, constituencyId) {
var td = $(tr).find('td')
if (td.length > 0) {
var partyPath = $(td[1]).find('a').attr('href')
var candidatePath = $(td[0]).find('a').attr('href')
return {
constituency_id: constituencyId,
constituency_name: constituencyName,
candidate_name: td[0].innerText,
candidate_url: candidatePath && baseUrl + candidatePath,
party: td[1].innerText,
party_url: partyPath && baseUrl + partyPath,
vote_1: td[2].innerText,
vote_2: td[3].innerText
}
}
}
artoo.ajaxSpider(pages, {
jquerify: true,
throttle: 3000,
process: function (doc) {
var rows = findRows(doc)
var data = parseRows(rows, doc)
result = result.concat(data)
artoo.log.debug('parsing...', data)
},
done: function () {
artoo.saveCsv(result)
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment