Casperjs新页面但表格不更新

问题描述 投票:0回答:1

我试图在以下网站中提取表格

http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table

该网址将更改为第2页,但无论暂停如何,屏幕截图或保存的.doc文件中都会显示相同的表格。在您点击第2页的网站上,该表会自动更新。任何帮助将不胜感激。

以下是我的代码

var casper = require('casper').create();
var fs = require('fs');
casper.start('http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-     01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table', function() {

this.capture("crap0" + ".png");
var firstRow = this.evaluate(function () {
    var elements = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
    return [].map.call(elements, function(element) {
        return element.innerText;
    });

});

fs.write('pook.doc', firstRow, 'w');

});

casper.then(function() {
 //Click on 1st result link
this.click({
type: 'xpath',
  path: '//*[@id="results-pagination"]/div/a[3]'
});

 // var url ='http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=1&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table'
//this.open(url);

this.waitFor(function check() {
    return (this.getCurrentUrl() === 'http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=1&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table');
    },
function then() { // step to execute when check() is ok
    this.echo('Navigated to page 2', 'INFO');
},
function timeout() { // step to execute if check has failed
    this.echo('Failed to navigate to page 2', 'ERROR');
});
this.capture("crap" + ".png");

this.wait(20000, function() {
    this.echo("I've waited for 20 seconds.");
});
var firstRow2 = this.evaluate(function () {
    var elements2 = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
    return [].map.call(elements2, function(element2) {
        return element2.innerText;
    });

});

fs.write('poop.doc', firstRow2, 'w');
});


casper.run();
dynamic web-scraping phantomjs casperjs
1个回答
0
投票

你很亲密!请记住,只有当页面上下文包含动态内容时,才必须使用waitFor()函数。事实并非如此。

尝试:

var casper = require('casper').create();
var fs = require('fs');

casper.start('http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table', function() {

    this.capture("crap0" + ".png");

    var firstRow = this.evaluate(function () {
        var elements = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
        return [].map.call(elements, function(element) {
            return element.innerText;
        });
    });

    fs.write('pook.doc', firstRow, 'w');
});

casper.then(function() {
    //Click on 1st result link
    this.click({
        type: 'xpath',
        path: '//*[@id="results-pagination"]/div/a[3]'
    });

    casper.then(function() {

        this.capture("crap" + ".png"); 

        var firstRow2 = this.evaluate(function () {
            var elements2 = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
            return [].map.call(elements2, function(element2) {
                return element2.innerText;
            });

        }); 

        fs.write('poop.doc', firstRow2, 'w');
    });
});

casper.run();
© www.soinside.com 2019 - 2024. All rights reserved.