I'm trying to make a crawler for SEO purposes, and I can't seem to get PhantomJS to at least download this particular page: https://tablet.euroslots.com/home/
If I use cURL it works fine (but obviously doesn't process the javascript):
✓ 1344:0 /cherrytech/js-crawler root› curl https://tablet.euroslots.com/home/ <!doctype html><!--[if lt IE 7]><html class="no-js lt-ie9 lt-ie8 lt-ie7"> ... My PhantomJS script:
var page = require('webpage').create(); page.onResourceRequested = function (request) { console.log('Request ' + JSON.stringify(request, undefined, 4)); }; page.onResourceReceived = function(response) { console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + JSON.stringify(response)); }; page.onResourceError = function(resourceError) { console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')'); console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString); }; page.settings.userAgent = 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A406 Safari/8536.25'; page.open('https://tablet.euroslots.com/home/', function() { console.log(page.content); phantom.exit(); }); And this is the result of running it:
✓ 1347:0 /cherrytech/js-crawler root› phantomjs crawler.js Request { "headers": [ { "name": "User-Agent", "value": "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A406 Safari/8536.25" }, { "name": "Accept", "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } ], "id": 1, "method": "GET", "time": "2014-09-16T16:02:24.426Z", "url": "https://tablet.euroslots.com/home/" } Unable to load resource (#1URL:https://tablet.euroslots.com/home/) Error code: 2. Description: Connection closed Response (#1, stage "end"): {"contentType":null,"headers":[],"id":1,"redirectURL":null,"stage":"end","status":null,"statusText":null,"time":"2014-09-16T16:02:24.763Z","url":"https://tablet.euroslots.com/home/"} <html><head></head><body></body></html>
onResourceError. You will see that the connection is closed. I don't know why.--web-security=falseand--ignore-ssl-errors=truedoes nothing. You can also try slimerjs instead of phantomjs. Maybe it is a phantomjs limitationonResourceError