웹 사이트를 스크랩하려하지만 항상 Empty Reply from server
중 하나가 code
을보고 무엇이 잘못 되었습니까? 여기 curl error HTTP 상태 코드 0 - 서버의 빈 응답
Array ([url] => http://www.delivery-club.ru/ [content_type] => text/html [http_code] => 301 [header_size] => 196 [request_size] => 840 [filetime] => -1 [ssl_verify_result] => 0 [redirect_count] => 1 [total_time] => 61.359 [namelookup_time] => 0 [connect_time] => 0.281 [pretransfer_time] => 0.281 [size_upload] => 0 [size_download] => 0 [speed_download] => 0 [speed_upload] => 0 [download_content_length] => 178 [upload_content_length] => 0 [starttransfer_time] => 60.593 [redirect_time] => 0.766 [certinfo] => Array () [redirect_url] =>) Empty reply from server
여기 code
function spider($url){
$header = array(
"Host" => "www.delivery-club.ru",
//"Accept-Encoding:gzip,deflate,sdch",
"Accept-Language:en-US,en;q=0.8",
"Cache-Control:max-age=0",
"Connection:keep-alive","Content-Length:725","Content-Type:application/x-www-form-urlencoded",
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
,"X-Requested-With:XMLHttpRequest"
);
$cookie = "cookie.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0); // return headers 0 no 1 yes
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // return page 1:yes
curl_setopt($ch, CURLOPT_TIMEOUT, 200); // http request time-out 20 seconds
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Follow redirects, need this if the URL changes
curl_setopt($ch, CURLOPT_MAXREDIRS, 2); //if http server gives redirection response
curl_setopt($ch, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7");
curl_setopt($ch, CURLOPT_COOKIEJAR, realpath($cookie)); // cookies storage/here the changes have been made
curl_setopt($ch, CURLOPT_COOKIEFILE, realpath($cookie));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // false for https
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS,"view=ViewDistrict¶m=7&uniqueid=1397991494188&PHPSESSID=f134vrnv7glosgojvf4n1mp7o2&page=http%3A%2F%2Fwww.immosuchmaschine.at%2Fxhr.php");
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_REFERER, "http://www.immosuchmaschine.at/");
$data = curl_exec($ch); // execute the http request
$info = curl_getinfo($ch);
curl_close($ch); // close the connection
return $data;
}
입니다 function call
echo spider("http://www.delivery-club.ru/");
입니다 지금 curl_setopt($ch, CURLOPT_POSTFIELDS,"view=ViewDistrict¶m=7&uniqueid=".time(). rand(101,500)."&PHPSESSID=f134vrnv7glosgojvf4n1mp7o2&page=http%3A%2F%2Fwww.immosuchmaschine.at%2Fxhr.php");
하고 헤더에서이를 제거하는 시도 해 봤나 또한 headers
게시 값은 계속 유효합니까? 2 개 이상의 redir가 있습니까? 서버가 차단 되었습니까? 반환되는 헤더는 무엇입니까? –
예 이것들은 유효하고 아니요'redirect'가 없습니다 – Mubin
헤더로 질문을 업데이트 할 것입니다. – Mubin