493: Undecipherable

-Blog-

-Projects-

-About me-

-RSS-

Downloading data from a webpage with login

Dennis Guse

Usually, downloading data from a webpage is straight forward. Just use your favorite tool and go (such as wget or curl). Sometimes, however, it is not that simple - especially if the webpage requires a login (this means not sending credentials as part of the URL).

The following code does a login and then downloads a webpage from such a webpage. It uses PhantomJS.

var page = require('webpage').create();
page.onResourceReceived = function(response) {
//  console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + JSON.stringify(response));
};

//Start process: Login
page.open('https://LOGIN-URL', function(status) {
  if (status !== 'success') {
    console.log('Unable to access network');
    phantom.exit(-1);
  } else {
    console.log(page.url);

    //Set handler for the follow-up of the login request.
    page.onLoadFinished = downloadData;

    page.evaluate(function() {
       //Set credentials
       document.getElementById('USERNAME').value  = "USERNAME";
       document.getElementById('PASSWORD').value = "PASSWORD";
       //Trigger login request
       document.getElementById('Login').click();    
    });
  }
});

function downloadData(status) {
  console.log(status);
  console.log(page.content);

  page.onLoadFinished = undefined;

  //Download data
  page.open('https://DATA-URL', function(status) {
    console.log(page.content);
    phantom.exit(0);
  });
}