2

This is what I currently have:

var casper = require('casper').create(); var fs = require('fs'); var folderName = 'CARD_DATA'; var fileName = 'allChampionDecks.txt'; var save = fs.pathJoin(fs.workingDirectory, folderName, fileName); // init jquery var casper = require('casper').create({ clientScripts: ['jquery.min.js'] }); casper.start(URL, function() { }); var links = ["http://magic.wizards.com/en/events/coverage/mtgochamp14","http://magic.wizards.com/node/335986","http://magic.wizards.com/en/events/coverage/2014WC"]; var i = -1; var linkData = ''; // iterate casper.then(function() { this.each(links, function() { i++; this.thenOpen((links[i]), function() { linkData += this.evaluate(getLinkDeckData); }); }); fs.write(save, linkData + '\n', 'w'); }); // scrape function getLinkDeckData() { var meta = $('.deck-meta h4'); var event = $('.deck-meta h5'); var allDecks = $('.toggle-text .deck-list-text'); var json = '{'; for(var i = 0; i < meta.length; i++) { json += '"event": "'+$(event[i]).text().trim()+'",' +'"deckName": "'+$(meta[i]).text()+'",' +'"deck": ['; var cardCount = $(allDecks[i]).find('.sorted-by-overview-container .row .card-count'); var cardName = $(allDecks[i]).find('.sorted-by-overview-container .row .card-name'); for(var j = 0; j < cardCount.length; j++) { if(j < cardCount.length-1) json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},'; else json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}'; } json += '],' +'"sideboard": ['; var cardCount = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-count'); var cardName = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-name'); for(var j = 0; j < cardCount.length; j++) { if(j < cardCount.length-1) json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},'; else json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}'; } if(i < meta.length-1) json += '],' else json += ']}' /**/ } return json; } casper.run(); 

I'm trying to iterate through some links to scrape some data. I'm not getting any errors but linkData is empty and nothing is written to the file.

For a single page I used the following and it works fine:

casper.start(URL, function() { var data = this.evaluate(getLinkDeckData); fs.write(save, data + '\n', 'w'); }); 
0

1 Answer 1

2

All then* (and wait*) functions are asynchronous step functions. When you make a loop and inside the loop call casper.thenOpen() then you schedule an opening step with an accompanying then callback as a separate step.

The problem is that when you try to write linkData, it is not in a separate step. Simply wrap it in casper.then() and it will work.

Fixed snippet:

casper.then(function() { links.forEach(links, function(link, i) { this.thenOpen(link, function() { linkData += this.evaluate(getLinkDeckData); }); }); this.then(function(){ fs.write(save, linkData + '\n', 'w'); }); }); 

Instead of using CasperJS' each, you should use Array.prototype.forEach. That way, you don't need a global counter variable.

Sign up to request clarification or add additional context in comments.

1 Comment

I have an issue with looping and it's then() or thenEvaluate() don't work inside the loop: stackoverflow.com/q/44176889/190929

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.