So what is needed (ubuntu linux):
- Install node.js on ubuntu: http://ec2-50-16-39-169.compute-1.amazonaws.com/blog/?p=1688
- Download jquery (you could also include an online version - google is your friend): I use jquery-1.4.2.min.js (1.4.3 did not work for me)
var util = require("util"), | |
jsdom = require("jsdom"), | |
window = jsdom.jsdom().createWindow(); | |
jsdom.jQueryify(window,"/home/woidda/libs/js/jquery-1.4.2.min.js", function() { | |
window.jQuery('body').append("<div class='testing'>Hello World, It works! Really!</div>"); | |
util.print(window.jQuery(".testing").text()); | |
}); |
Type into a terminal:
> node helloJQueryNode.js
It should write to the console:
> Hello World, It works! Really!
Now a more complex example:
var fs = require('fs'), util = require('util'), | |
jsdom = require('jsdom'), http = require('http'); | |
var jQueryHome = '/home/woidda/libs/js/jquery-1.4.2.min.js'; | |
var xmlFileUrl = 'http://www.w3schools.com/xml/plant_catalog.xml'; | |
var host = 'www.w3schools.com'; | |
var client = http.createClient(80, host); | |
var request = client.request('GET', xmlFileUrl, | |
{'host': 'www.w3schools.com'}); | |
request.on('response', function (response) { | |
response.setEncoding('utf8'); | |
var body = ''; | |
response.on('data', function (chunk) { | |
body += chunk; | |
}); | |
response.on('end', function(){ | |
var window = jsdom.jsdom().createWindow(); | |
jsdom.jQueryify(window, jQueryHome, function () { | |
var sumOffPrices = 0; | |
window.jQuery(body).find('PRICE').each(function () { | |
var priceString = window.jQuery(this).text(); | |
var price = parseFloat(priceString.substr(1, priceString.length)); | |
sumOffPrices += price; | |
}); | |
console.log(sumOffPrices); | |
}); | |
}); | |
}); | |
request.end(); | |
// file based version - store the xml file in the folder with the script or alter path accordingly | |
/* | |
fs.readFile('plant_catalog.xml', 'UTF-8', function(err, chunk){ | |
if(err) throw err; | |
var window = jsdom.jsdom().createWindow(); | |
jsdom.jQueryify(window, jQueryHome, function () { | |
var sumOffPrices = 0; | |
window.jQuery(chunk).find('PRICE').each(function () { | |
var priceString = window.jQuery(this).text(); | |
var price = parseFloat(priceString.substr(1, priceString.length)); | |
sumOffPrices += price; | |
}); | |
console.log(sumOffPrices); | |
}); | |
}); | |
*/ |
again Type into a terminal:
> node xmlTest.js
It should write to the console:
> 229.29
Pretty simple!! So what does it do? So you have got an XML file. Node js fetches the file and jQuery is used for parsing. Well using jQuery selector syntax to extract the stuff you are looking for from an XML file is typically a lot easier than for example using XPath.
Okay, as already mentioned for big XML files and if you have lots of files to crawl *I* would use more likely Java (apache httpclient + a fast STAX-parser such as Woodstox). For very simple tasks Node.js + jQuery is really a good choice.
Some more about Node.js:
http://net.tutsplus.com/tutorials/javascript-ajax/learning-serverside-javascript-with-node-js/