Downloading data from a webpage with login
Dennis GuseUsually, downloading data from a webpage is straight forward. Just use your favorite tool and go (such as wget or curl). Sometimes, however, it is not that simple - especially if the webpage requires a login (this means not sending credentials as part of the URL).
The following code does a login and then downloads a webpage from such a webpage. It uses PhantomJS.
var page = require('webpage').create();
page.onResourceReceived = function(response) {
// console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + JSON.stringify(response));
};
//Start process: Login
page.open('https://LOGIN-URL', function(status) {
if (status !== 'success') {
console.log('Unable to access network');
phantom.exit(-1);
} else {
console.log(page.url);
//Set handler for the follow-up of the login request.
page.onLoadFinished = downloadData;
page.evaluate(function() {
//Set credentials
document.getElementById('USERNAME').value = "USERNAME";
document.getElementById('PASSWORD').value = "PASSWORD";
//Trigger login request
document.getElementById('Login').click();
});
}
});
function downloadData(status) {
console.log(status);
console.log(page.content);
page.onLoadFinished = undefined;
//Download data
page.open('https://DATA-URL', function(status) {
console.log(page.content);
phantom.exit(0);
});
}