Skip to content

Instantly share code, notes, and snippets.

@kielni
Created February 15, 2015 05:20
Show Gist options
  • Save kielni/0c6d921d68cfcef85068 to your computer and use it in GitHub Desktop.
Save kielni/0c6d921d68cfcef85068 to your computer and use it in GitHub Desktop.
Extract title and description of City of San Jose camp from URL and append to a text file
var request = require("request"),
cheerio = require("cheerio"),
_ = require("lodash"),
fs = require("fs"),
readline = require("readline");
var rl = readline.createInterface(process.stdin, process.stdout);
rl.setPrompt('url> ');
rl.prompt('');
rl.on('line', function(line) {
rl.prompt();
var url = line.trim();
request(url, function(err, response, body) {
if (err) throw err;
var $ = cheerio.load(body);
var title = $('title').text().trim();
var p = ["","","","","",""];
$(".an-overlay-leftinfo p").each(function(idx, element) {
p[idx] = $(element).text().trim();
});
// description is in 5th 0-indexed p; if not there, the 2nd
var descIdx = _.find([4,1], function(idx) {
return p[idx].length > 80;
});
var desc = p[descIdx];
// unless it's somewhere else
var text = $(".overlay-description-text").text().trim();
if (!desc && text.length > 0) {
desc = text;
}
var camp = "\n"+url+"\n"+title+"\n"+desc+"\n";
fs.appendFile("san_jose.txt", camp, function (err) {
console.log("camp: "+title+"\n---\n"+desc+"\n\n");
rl.prompt('');
});
});
}).on('close', function() {
process.exit(0);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment