2017-09-19 2 views
1

노드에 대해 더 자세히 알기 위해 node.js 웹 스 크레이퍼를 만들려고합니다. 페이지가 매겨진 페이지가있는 벽에 부딪 혔습니다. 여러 페이지 요청을 처리하도록 권장 된 'async'패키지를 사용했습니다. 나는이 코드를 실행하면웹 스크래퍼가 페이지 매김과 함께 작동하지 않습니다.

, 그것은 테스트 케이스가 true를 돌려주는 경우에도 여러 요청을 반복하지 않습니다 어떤 도움을 크게

var async = require('async'); 
var request = require('request'); 
var cheerio = require('cheerio'); 

var page = 0; 
var options = { 
    url: 'http://www.metacritic.com/browse/movies/title/dvd?page=' + page, 
    headers: { 
     'User-Agent': 'Mozilla Firefox' 
    } 
}; 

var scores = []; 
var titles = []; 
var metaData = []; 
var scoresTitles = {}; 
var pageExists = true; 

async.whilst(
    function(){ 
     return page < 2; 
    }, 
    function(next){ 
     request(options, function(err, res, html) { 
      if(err) { console.log(err) } 

      var $ = cheerio.load(html); 
      console.log('status code:' + res.statusCode); 
      console.log(res.headers); 


      $('td.score_wrapper div.metascore_w').each(function(i, node) { 
       scores.push($(this).text()); 
      }) 

      $('td.title_wrapper div.title a').each(function(i, node) { 
       titles.push($(this).text()); 
      }) 

      for (i = 0; i < scores.length; i++) { 
       scoresTitles = { 
        score: scores[i], 
        title: titles[i] 
       } 
       metaData.push(scoresTitles); 
      } 

      console.log(metaData); 
      if ($('div[class=pad_top1]').text().trim() == 'No movies found.') { 
       pageExists = false; 
      } 
      page++; 
     }) 

     console.log(page); 

    } 
); 

을 감상 할 수 있습니다.

답변

2

난 그냥 내 노트북에 코드를 실행하고 완벽하게 작동하는 것 같다 :

0 
status code:200 
{ 'content-type': 'text/html; charset=UTF-8', 
    'transfer-encoding': 'chunked', 
    connection: 'close', 
    age: '0', 
    'access-control-allow-headers': 'Origin, Authorization, X-Requested-With', 
    'access-control-allow-methods': 'POST, GET, OPTIONS', 
    'set-cookie': 
    [ 'ctk=NTljMWQ4YTM2N2YzMGMxYWRjMWQwZmQ5ZjUyNQ%3D%3D; expires=Mon, 19-Mar-2018 02:55:31 GMT; Max-Age=15552000; path=/; domain=.metacritic.com', 
    'il_geo = %7B%22country%5Fcode%22%3A%22AU%22%2C+%22country%5Fname%22%3A%22Australia%22%2C+%22dma%5Fcode%22%3A%22ZZ%22%2C+%22postal%5Fcode%22%3A%223122%22%7D; path=/; domain=www.metacritic.com; expires=Wed, 27-Sep-17 02:55:31 GMT' ], 
    date: 'Wed, 20 Sep 2017 02:55:46 GMT', 
    'x-varnish': '561413567', 
    'x-instart-request-id': '13619236965216160776:FLQ01-NPPRY16:1505876146:0' } 
[ { score: '42', title: '#Horror' }, 
    { score: '68', title: '$9.99' }, 
    { score: '34', title: '$pent' }, 
    { score: '83', title: '\'71' }, 
    { score: '55', title: '\'R Xmas' }, 
    { score: '76', title: '(500) Days of Summer' }, 
    { score: '60', title: '+1' }, 
    { score: '58', title: '...And They Lived Happily Ever After' }, 
    { score: '65', title: '...So Goes the Nation' }, 
    { score: '57', title: '1,000 Times Good Night' }, 
    { score: '37', title: '10 Cent Pistol' }, 
    { score: '76', title: '10 Cloverfield Lane' }, 
    { score: '54', title: '10 Items or Less' }, 
    { score: '70', title: '10 Things I Hate About You' }, 
    { score: '61', title: '10 Years' }, 
    { score: '34', title: '10,000 BC' }, 
    { score: '75', title: '10,000 km' }, 
    { score: '63', title: '100 Bloody Acres' }, 
    { score: '44', title: '100 Streets' }, 
    { score: '49', title: '101 Dalmatians' }, 
    { score: '35', title: '102 Dalmatians' }, 
    { score: '36', title: '10th & Wolf' }, 
    { score: '71', title: '11 Flowers' }, 
    { score: '65', title: '11:14' }, 
    { score: '96', title: '12 Angry Men' }, 
    { score: '38', title: '12 Rounds' }, 
    { score: '96', title: '12 Years a Slave' }, 
    { score: '82', title: '127 Hours' }, 
    { score: '84', title: '13 Assassins' }, 
    { score: '41', title: '13 Cameras' }, 
    { score: '57', title: '13 Going on 30' }, 
    { score: '48', 
    title: '13 Hours: The Secret Soldiers of Benghazi' }, 
    { score: '61', title: '13 Tzameti' }, 
    { score: '50', title: '14 Blades' }, 
    { score: '64', title: '1408' }, 
    { score: '34', title: '15 Minutes' }, 
    { score: '47', title: '15: The Movie' }, 
    { score: '67', title: '16 Acres' }, 
    { score: '63', title: '16 Blocks' }, 
    { score: '57', title: '16 Years of Alcohol' }, 
    { score: '48', title: '17 Again' }, 
    { score: '37', title: '1911' }, 
    { score: '73', title: '1971' }, 
    { score: '29', title: '1st Night' }, 
    { score: '61', title: '2 Days in New York' }, 
    { score: '67', title: '2 Days in Paris' }, 
    { score: '38', title: '2 Fast 2 Furious' }, 
    { score: '55', title: '2 Guns' }, 
    { score: '58', title: '20 Centimeters' }, 
    { score: '83', title: '20 Feet from Stardom' }, 
    { score: '33', title: '200 Cigarettes' }, 
    { score: '86', title: '2001: A Space Odyssey' }, 
    { score: '40', title: '2009: Lost Memories' }, 
    { score: '49', title: '2012' }, 
    { score: '78', title: '2046' }, 
    { score: '83', title: '20th Century Women' }, 
    { score: '48', title: '21' }, 
    { score: '34', title: '21 and Over' }, 
    { score: '70', title: '21 Grams' }, 
    { score: '69', title: '21 Jump Street' }, 
    { score: '51', title: '21 Years: Richard Linklater' }, 
    { score: '71', title: '22 Jump Street' }, 
    { score: '45', title: '23 Blast' }, 
    { score: '59', title: '24 Days' }, 
    { score: '85', title: '24 Hour Party People' }, 
    { score: '47', title: '24 Hours on Craigslist' }, 
    { score: '67', title: '25th Hour' }, 
    { score: '47', title: '27 Dresses' }, 
    { score: '46', title: '28 Days' }, 
    { score: '73', title: '28 Days Later...' }, 
    { score: '50', title: '28 Hotel Rooms' }, 
    { score: '78', title: '28 Weeks Later' }, 
    { score: '55', title: '3' }, 
    { score: '46', title: '3 Dancing Slaves' }, 
    { score: '40', title: '3 Days to Kill' }, 
    { score: '9', title: '3 Geezers!' }, 
    { score: '47', title: '3 Generations' }, 
    { score: '56', title: '3 Hearts' }, 
    { score: '67', title: '3 Idiots' }, 
    { score: '48', title: '3 Needles' }, 
    { score: '11', title: '3 Strikes' }, 
    { score: '71', title: '3 Women' }, 
    { score: '51', title: '3, 2, 1... Frankie Go Boom' }, 
    { score: '72', title: '3-Iron' }, 
    { score: '53', title: '30 Days of Night' }, 
    { score: '49', title: '30 Minutes or Less' }, 
    { score: '56', title: '30 Years to Life' }, 
    { score: '52', title: '300' }, 
    { score: '21', title: '3000 Miles to Graceland' }, 
    { score: '48', title: '300: Rise of an Empire' }, 
    { score: '35', title: '31' } ] 

사용중인 노드의 버전은 무엇? lts를 사용하고 있다면, async는 아직 지원되지 않습니다. 이 경우 최신 버전으로 전환하는 것을 고려하십시오 (현재 8.5.0).

관련 문제