0

I'm having some serious problems with a fairly simple CasperJS scraper that I'm trying to write. Essentially, I want to traverse some search results on a website, following each result, collecting some data, then returning to the current search page. Once this process has been completed, I want to write the results to file. I have the following code that is not working at all. Please excuse any glaringly obvious mistakes, I am quite new to javascript, being a Java, Ruby, C++ native.

// This site can also be queried via URL, I initially wrote this serializer // to use this approach, but I ended up going with CasperJS navigation instead. // My problems seem agnostic to whether or not I navigate using page links or URL. function serialize(json) { var str = []; for(var prop in json) { if(json.hasOwnProperty(prop)) { str.push(encodeURIComponent(prop) + "=" + encodeURIComponent(obj[prop])); } } return str.join("&"); } // Scrape Links and Names from the current page in the searh results function getPageLinks() { var dancers = document.querySelectorAll('h4 > a'); return Array.prototype.map.call(links, function(e) { var result = {}; result[e.textContent] = e.getAttribute('href'); return result; }); } // For a given dancer, scrape the block of html containing the name of each donor, // their donation amount, and any comments. function scrapeDonorInfo() { var donors = document.querySelectorAll('div.msgBottomInnCont > div.meta'); return Array.prototype.map.call(links, function(e) { return e.innerHtml; }); } // Use Tail recursion to scrape the donors for every dancer in each page of the search results. function scrapeAllDonors(dancers, startIndex) { // Inject Underscore.js for utility methods (namely _.union()) this.page.injectJs('https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js'); // Populate the links object only after there are links to scrape casper.waitForSelector('h4 > a', function() { var links = this.evaluate(getPageLinks); dancers = this.evaluate(_.union(dancers, links)); }); this.echo('Links object populated', 'INFO'); // Log the message, // using this.echo() for colored tags // For every dancer page link on this page of search results, // fetch their fundraising page, scrape their donors, // dancers.forEach(function(element, index, array) { if(index >= startIndex) { var name = Object.keys(element)[0]; var link = baseURL + element[name]; casper.thenOpen(link); casper.waitForSelector('div.meta', function() { var viewMore = 'a.viewMore'; if(casper.visible(viewMoreActivity)) { casper.thenClick(viewMore); } element[name] = {"donor_info": this.evaluate(getDonorInfo)}; }); casper.back(); } }); var nextLink = "a#next"; casper.waitForSelector(nextLink, function() { // If the next button in the results is clickable, click it. if (casper.visible(nextLink)) { casper.thenClick(nextLink); casper.thenEvaluate(scrapeAllDonors(dancers, dancers.length())); } else { // Otherwise, write the final results to file. fs.write(save, dancers, 'w'); casper.echo("END") } }); } // Note: This is the Phantom.js package 'fs', not the Node.js package. var fs = require('fs'); // Create a dated file for scrape results var fname = new Date().getTime() + '.txt'; var save = fs.pathJoin(fs.workingDirectory, 'data', fname); // Initialize Casper.js with desired settings var casper = require('casper').create({ verbose: true, logLevel: 'debug', pageSettings: { loadImages: false, loadPlugins: false } }); // Handler for Resource Errors casper.on("resource.error", function(resourceError) { console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')'); console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString); }); // Handler for Page Errors casper.on("page.error", function (msg, trace) { console.log( 'Error: ' + msg, 'ERROR' ); console.log( 'Trace: ' + trace, 'TRACE' ); }); // Handler for Blocking requests made by social components (facebook in particular) casper.on("resource.requested", function(requestData, networkRequest){ console.log('Request (#' + requestData.id + '): ' + JSON.stringify(requestData) + "\n"); if (requestData.url.indexOf("facebook") !== -1) { networkRequest.abort(); } }); // BaseURL for the site, convenient for scrapeAllDonors var baseURL = 'https://fundraise.nudm.org/'; casper.start('https://fundraise.nudm.org/search/fundraisers?page=1'); casper.then(scrapeAllDonors([], 0)); // Run everything in the stack, then notify and exit casper.run(function() { this.echo("DONE", 'INFO'); this.exit(); }); 

To make the problem worse, Casper/Phantom refuses to print any of my log messages and I can't figure out why. When I run without debug, I get:

casperjs --ssl-protocol=tlsv1 Crawler.js [info] [phantom] Starting... Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match. 

With debug enabled, I get:

 casperjs --ssl-protocol=tlsv1 -debug=true Crawler.js Unable to open file: -debug=true Unable to load script -debug=true; check file syntax dhcp-199-74-85-154:NUDM Expose williambyrne$ casperjs --ssl-protocol=tlsv1 --debug=true Crawler.js 2016-03-06T14:22:31 [DEBUG] CookieJar - Created but will not store cookies (use option '--cookies-file=<filename>' to enable persisten cookie storage) 2016-03-06T14:22:31 [DEBUG] Phantom - execute: Configuration 2016-03-06T14:22:31 [DEBUG] 0 objectName : "" 2016-03-06T14:22:31 [DEBUG] 1 cookiesFile : "" 2016-03-06T14:22:31 [DEBUG] 2 diskCacheEnabled : "false" 2016-03-06T14:22:31 [DEBUG] 3 maxDiskCacheSize : "-1" 2016-03-06T14:22:31 [DEBUG] 4 ignoreSslErrors : "false" 2016-03-06T14:22:31 [DEBUG] 5 localToRemoteUrlAccessEnabled : "false" 2016-03-06T14:22:31 [DEBUG] 6 outputEncoding : "UTF-8" 2016-03-06T14:22:31 [DEBUG] 7 proxyType : "http" 2016-03-06T14:22:31 [DEBUG] 8 proxy : ":1080" 2016-03-06T14:22:31 [DEBUG] 9 proxyAuth : ":" 2016-03-06T14:22:31 [DEBUG] 10 scriptEncoding : "UTF-8" 2016-03-06T14:22:31 [DEBUG] 11 webSecurityEnabled : "true" 2016-03-06T14:22:31 [DEBUG] 12 offlineStoragePath : "" 2016-03-06T14:22:31 [DEBUG] 13 offlineStorageDefaultQuota : "-1" 2016-03-06T14:22:31 [DEBUG] 14 printDebugMessages : "true" 2016-03-06T14:22:31 [DEBUG] 15 javascriptCanOpenWindows : "true" 2016-03-06T14:22:31 [DEBUG] 16 javascriptCanCloseWindows : "true" 2016-03-06T14:22:31 [DEBUG] 17 sslProtocol : "tlsv1" 2016-03-06T14:22:31 [DEBUG] 18 sslCertificatesPath : "" 2016-03-06T14:22:31 [DEBUG] 19 webdriver : ":" 2016-03-06T14:22:31 [DEBUG] 20 webdriverLogFile : "" 2016-03-06T14:22:31 [DEBUG] 21 webdriverLogLevel : "INFO" 2016-03-06T14:22:31 [DEBUG] 22 webdriverSeleniumGridHub : "" 2016-03-06T14:22:31 [DEBUG] Phantom - execute: Script & Arguments 2016-03-06T14:22:31 [DEBUG] script: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js" 2016-03-06T14:22:31 [DEBUG] 0 arg: "--casper-path=/usr/local/Cellar/casperjs/1.1-beta4/libexec" 2016-03-06T14:22:31 [DEBUG] 1 arg: "--cli" 2016-03-06T14:22:31 [DEBUG] 2 arg: "Crawler.js" 2016-03-06T14:22:31 [DEBUG] Phantom - execute: Starting normal mode 2016-03-06T14:22:31 [DEBUG] WebPage - setupFrame "" 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/_coffee-script.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/package.json" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/coffee-script.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./lexer.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././rewriter.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././helpers.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./parser.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./helpers.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./nodes.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././scope.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./././helpers.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././lexer.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./././rewriter.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/package.json" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/cli.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/utils.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] Phantom - injectJs: "Crawler.js" 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/casper.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/colorizer.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/events.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/http.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/mouse.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/pagestack.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/querystring.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/tester.js" QMap(("mode", QVariant(QString, "r") ) ) [info] [phantom] Starting... 2016-03-06T14:22:31 [DEBUG] WebpageCallbacks - getJsConfirmCallback 2016-03-06T14:22:31 [DEBUG] WebpageCallbacks - getGenericCallback 2016-03-06T14:22:31 [DEBUG] WebpageCallbacks - getJsConfirmCallback 2016-03-06T14:22:31 [DEBUG] WebPage - setupFrame "" 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/_coffee-script.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 10 2016-03-06T14:22:31 [DEBUG] WebPage - setupFrame "" 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/_coffee-script.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r") ) ) 2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 100 Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match. 2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 10 2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 100 

Any ideas?

Update (after making the first of the suggested changes)

williambyrne$ casperjs --ssl-protocol=tlsv1 Crawler.js [info] [phantom] Starting... [info] [phantom] Running suite: 3 steps [debug] [phantom] opening url: https://fundraise.nudm.org/search/fundraisers?page=1, HTTP GET [debug] [phantom] Navigation requested: url=https://fundraise.nudm.org/search/fundraisers?page=1, type=Other, willNavigate=true, isMainFrame=true Request (#1): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}],"id":1,"method":"GET","time":"2016-03-06T21:03:49.874Z","url":"https://fundraise.nudm.org/search/fundraisers?page=1"} [debug] [phantom] url changed to "https://fundraise.nudm.org/search/fundraisers?page=1" Request (#2): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":2,"method":"GET","time":"2016-03-06T21:03:51.112Z","url":"https://fundraise.nudm.org/css/sc_global.css?cuiv=1456860159443"} Request (#3): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":3,"method":"GET","time":"2016-03-06T21:03:51.113Z","url":"https://fundraise.nudm.org/stylesheets/css/charity/search.css?cuiv=1456860159443"} Request (#4): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":4,"method":"GET","time":"2016-03-06T21:03:51.113Z","url":"https://fundraise.nudm.org/css/white_label_header_v3.4.3.1.css?cuiv=1456860159443"} Request (#5): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":5,"method":"GET","time":"2016-03-06T21:03:51.114Z","url":"https://fundraise.nudm.org/css/white_label_header_responsive.css?cuiv=1456860159443"} Request (#6): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":6,"method":"GET","time":"2016-03-06T21:03:51.114Z","url":"https://ajax.googleapis.com/ajax/libs/jquery/1.8.1/jquery.min.js"} Request (#7): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":7,"method":"GET","time":"2016-03-06T21:03:51.114Z","url":"https://fundraise.nudm.org/js/front_scripts.js?cuiv=1456860159443"} Request (#8): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":8,"method":"GET","time":"2016-03-06T21:03:51.115Z","url":"https://fundraise.nudm.org/js/mobile_share.js?cuiv=1456860159443"} Request (#9): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":9,"method":"GET","time":"2016-03-06T21:03:51.115Z","url":"https://fundraise.nudm.org/js/search.js?cuiv=1456860159443"} Request (#10): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":10,"method":"GET","time":"2016-03-06T21:03:51.116Z","url":"https://fundraise.nudm.org/js/mobile.js?cuiv=1456860159443"} Request (#11): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":11,"method":"GET","time":"2016-03-06T21:03:51.304Z","url":"https://ssl.google-analytics.com/ga.js"} Request (#12): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":12,"method":"GET","time":"2016-03-06T21:03:51.304Z","url":"https://www.google-analytics.com/analytics.js"} Request (#13): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":13,"method":"GET","time":"2016-03-06T21:03:51.309Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Reg-webfont.woff"} Request (#14): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":14,"method":"GET","time":"2016-03-06T21:03:51.313Z","url":"https://connect.facebook.com/en_US/sdk.js"} Request (#15): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":15,"method":"GET","time":"2016-03-06T21:03:51.314Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Sbold-webfont.woff"} Request (#16): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":16,"method":"GET","time":"2016-03-06T21:03:51.315Z","url":"https://fundraise.nudm.org/css/fonts/pictos/pictos-webfont.woff"} Request (#17): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":17,"method":"GET","time":"2016-03-06T21:03:51.315Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Bold-webfont.woff"} Request (#18): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":18,"method":"GET","time":"2016-03-06T21:03:51.316Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Thin-webfont.woff"} Request (#19): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":19,"method":"GET","time":"2016-03-06T21:03:51.317Z","url":"https://fundraise.nudm.org/css/fonts/entypo/entypo.woff"} Unable to load resource (#14URL:) Error code: 301. Description: Protocol "" is unknown Request (#20): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":20,"method":"GET","time":"2016-03-06T21:03:51.796Z","url":"https://js-agent.newrelic.com/nr-885.min.js"} Request (#21): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":21,"method":"GET","time":"2016-03-06T21:03:53.756Z","url":"https://bam.nr-data.net/1/67fe2a1b26?a=10291124&v=885.a559836&to=ZV0HYUJUCEYEU0QLC1wXJFZEXAlbSlRVBAVHVBEaQ1AHRwZYHwQRXFwXVFlGA0cW&rst=2645&ap=775&fe=686&dc=204&f=%5B%5D&at=SRoEFwpOG0g%3D&jsonp=NREUM.setToken"} [debug] [phantom] Successfully injected Casper client-side utilities [debug] [phantom] start page is loaded [info] [phantom] Step anonymous 3/3 https://fundraise.nudm.org/search/fundraisers?page=1 (HTTP 200) Links object populated [info] [phantom] Step anonymous 3/3: done in 3944ms. [info] [phantom] Step _step 4/5 https://fundraise.nudm.org/search/fundraisers?page=1 (HTTP 200) [info] [phantom] Step _step 4/5: done in 3965ms. [info] [phantom] waitFor() finished in 40ms. [info] [phantom] Step anonymous 5/6 https://fundraise.nudm.org/search/fundraisers?page=1 (HTTP 200) Error: ReferenceError: Can't find variable: links Trace: [object Object],[object Object],[object Object] Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match. Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match. Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match. 

It seems that there is some problem with the scoping of the 'links' array.

Update 2: (Changes to scrapeAllDonors)

// Use Tail recursion to scrape the donors for every dancer in each page of the search results. function scrapeAllDonors(dancers, startIndex) { // Inject Underscore.js for utility methods (namely _.union()) this.page.injectJs('https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js'); // Populate the links object only after there are links to scrape casper.waitForSelector('h4 > a', function() { var links = this.evaluate(getPageLinks); dancers = this.evaluate(_.union(dancers, links)); // For every dancer page link on this page of search results, // fetch their fundraising page, scrape their donors, // dancers.forEach(function(element, index, array) { if(index >= startIndex) { var name = Object.keys(element)[0]; var link = baseURL + element[name]; casper.thenOpen(link); casper.waitForSelector('div.meta', function(name) { var viewMore = 'a.viewMore'; if(casper.visible(viewMoreActivity)) { casper.thenClick(viewMore); } element[name] = {"donor_info": this.evaluate(getDonorInfo)}; }, name); casper.back(); } }); // If the next button in the results is clickable, click it. var nextLink = "a#next"; if (casper.visible(nextLink)) { casper.thenClick(nextLink); casper.then(function() { scrapeAllDonors.call(this, dancers, dancers.length()); }); } else { // Otherwise, write the final results to file. fs.write(save, dancers, 'w'); casper.echo("END") } }); this.echo('Donor Information Scraped', 'INFO'); // Log the message, // using this.echo() for colored tags } 

1 Answer 1

1

You've made the error of calling scrapeAllDonors immediately instead of passing it in for execution at a later time, here:

casper.thenEvaluate(scrapeAllDonors(dancers, dancers.length())); 

and here:

casper.then(scrapeAllDonors([], 0)); 

This means that it executes before even the first page is loaded and therefore tries to operate on about:blank. If you want to call it like that, you need to refactor scrapeAllDonors, so that it returns a step function:

function scrapeAllDonors(dancers, startIndex) { return function(){ // Inject Underscore.js for utility methods (namely _.union()) this.page.injectJs('https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js'); // ... var nextLink = "a#next"; casper.waitForSelector(nextLink, function() { // ... }); }; } 

If you don't want to change scrapeAllDonors would be to refactor the calls to it by replacing

casper.then(scrapeAllDonors(...)); 

with

casper.then(function(){ scrapeAllDonors.call(this, ...) }); 

My answer on What must be wrapped in then() statements in CasperJS? How to determine execution order of sync/async functions? might be helpful for understanding the intricacies of the asynchronous execution in CasperJS.

Sign up to request clarification or add additional context in comments.

13 Comments

So if I'm understanding this correctly the problem is that scrapeAllDonors is called before the page I requested in casper.start is finished loading? So would a viable solution be to throw the casper.then(scrapeAllDonors([], 0)); in a casper.waitForUrl('https://fundraise.nudm.org/search/fundraisers?page=1', function() {}); ?
PS: I actually read that answer while I was working on this. Interesting, but I must not have read closely enough.
Got it, thanks so much. I'll give both of those a try.
Good point, incidentally, its not quite working. Check the update
Looking further on your code, it should be noted that wait* is asynchronous, so dancers.forEach will never iterate or will result in a TypeError. You need to move that block to the end of the casper.waitForSelector callback above it. Besides that, I haven't checked whether the logic is ok ;)
|

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.