Skip to content

Instantly share code, notes, and snippets.

@karmadude
Created December 23, 2011 22:36
Show Gist options
  • Save karmadude/1515567 to your computer and use it in GitHub Desktop.
Save karmadude/1515567 to your computer and use it in GitHub Desktop.
Web Scraping with Node
// https://github.com/tmpvar/jsdom
// npm install jsdom
var jsdom = require('jsdom');
function scrapeDribbble (url, page) {
dribbbles[page-1] = [];
if (page > 1) {
url = dribbbleURL + '/shots?page=' + page;
}
jsdom.env(url, [
jqueryURL
],
function(errors, window) {
window.$(".dribbbles > li").each(function() {
var $ = window.$;
var dribbble = {};
dribbble.id = $(this).attr('id').replace('screenshot-', '');
dribbble.title = $(this).find('.dribbble-over strong').html();
dribbble.comment = $(this).find('.dribbble-over .comment').html();
dribbble.date = $(this).find('.dribbble-over em').html();
dribbble.url = dribbbleURL + $(this).find('.dribbble-link').attr('href');
dribbble.img = dribbbleURL + $(this).find('.dribbble-link img').attr('src');
dribbble.stats = {};
dribbble.stats.views = $(this).find('.views').html().trim() || 0;
dribbble.stats.comments = $(this).find('.cmnt a').html() || 0;
dribbble.stats.favs = $(this).find('.fav a').html() || 0;
dribbbles[page-1].push(dribbble);
});
pagesProcessed++;
if( pagesProcessed === maxPages) {
console.log("Dribbbles Scraped: ", dribbbles.length);
console.log("Dribbbles JSON: ", JSON.stringify(dribbbles));
}
});
}
console.log('Dribbble Scraper');
var dribbbleURL = 'http://dribbble.com';
var jqueryURL = 'http://code.jquery.com/jquery-1.7.1.min.js';
var dribbbles = [];
var maxPages = 10;
var pagesProcessed = 0;
for(var page = 1; page <= maxPages; page++) {
scrapeDribbble(dribbbleURL, page);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment