2013-07-07 4 views
1

30.000.000 페이지가 넘는 페이지에 대한 사이트 맵을 만들고 싶습니다. 페이지는 매일 업데이트되고 새 페이지가 제거되고 추가됩니다.큰 웹 사이트의 php sitemap

나는 cron 작업으로 실행하고 싶습니다이 PHP 스크립트를 발견.

Sitemap php script

나는 열에서 테이블 "myuri"의 모든 URI를 가지고 "URI"항목은 예를 들어, 기록 된 "/this-is-a-page.html". 스크립트를 테이블에 추가하려면 어떤 매개 변수를 추가해야합니까?

<?php 

/* 
* author:  Kyle Gadd 
* documentation: http://www.php-ease.com/classes/sitemap.html 
* 
* This program is free software: you can redistribute it and/or modify 
* it under the terms of the GNU General Public License as published by 
* the Free Software Foundation, either version 3 of the License, or 
* (at your option) any later version. 
* 
* This program is distributed in the hope that it will be useful, 
* but WITHOUT ANY WARRANTY; without even the implied warranty of 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
* GNU General Public License for more details. 
* 
* You should have received a copy of the GNU General Public License 
* along with this program. If not, see <http://www.gnu.org/licenses/>. 
*/ 

class Sitemap { 

    private $compress; 
    private $page = 'index'; 
    private $index = 1; 
    private $count = 1; 
    private $urls = array(); 

    public function __construct ($compress=true) { 
    ini_set('memory_limit', '75M'); // 50M required per tests 
    $this->compress = ($compress) ? '.gz' : ''; 
    } 

    public function page ($name) { 
    $this->save(); 
    $this->page = $name; 
    $this->index = 1; 
    } 

    public function url ($url, $lastmod='', $changefreq='', $priority='') { 
    $url = htmlspecialchars(BASE_URL . $url); 
    $lastmod = (!empty($lastmod)) ? date('Y-m-d', strtotime($lastmod)) : false; 
    $changefreq = (!empty($changefreq) && in_array(strtolower($changefreq), array('always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'))) ? strtolower($changefreq) : false; 
    $priority = (!empty($priority) && is_numeric($priority) && abs($priority) <= 1) ? round(abs($priority), 1) : false; 
    if (!$lastmod && !$changefreq && !$priority) { 
     $this->urls[] = $url; 
    } else { 
     $url = array('loc'=>$url); 
     if ($lastmod !== false) $url['lastmod'] = $lastmod; 
     if ($changefreq !== false) $url['changefreq'] = $changefreq; 
     if ($priority !== false) $url['priority'] = ($priority < 1) ? $priority : '1.0'; 
     $this->urls[] = $url; 
    } 
    if ($this->count == 50000) { 
     $this->save(); 
    } else { 
     $this->count++; 
    } 
    } 

    public function close() { 
    $this->save(); 
    $this->ping_search_engines(); 
    } 

    private function save() { 
    if (empty($this->urls)) return; 
    $file = "sitemap-{$this->page}-{$this->index}.xml{$this->compress}"; 
    $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; 
    $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
    foreach ($this->urls as $url) { 
     $xml .= ' <url>' . "\n"; 
     if (is_array($url)) { 
     foreach ($url as $key => $value) $xml .= " <{$key}>{$value}</{$key}>\n"; 
     } else { 
     $xml .= " <loc>{$url}</loc>\n"; 
     } 
     $xml .= ' </url>' . "\n"; 
    } 
    $xml .= '</urlset>' . "\n"; 
    $this->urls = array(); 
    if (!empty($this->compress)) $xml = gzencode($xml, 9); 
    $fp = fopen(BASE_URI . $file, 'wb'); 
    fwrite($fp, $xml); 
    fclose($fp); 
    $this->index++; 
    $this->count = 1; 
    $num = $this->index; // should have already been incremented 
    while (file_exists(BASE_URI . "sitemap-{$this->page}-{$num}.xml{$this->compress}")) { 
     unlink(BASE_URI . "sitemap-{$this->page}-{$num}.xml{$this->compress}"); 
     $num++; 
    } 
    $this->index($file); 
    } 

    private function index ($file) { 
    $sitemaps = array(); 
    $index = "sitemap-index.xml{$this->compress}"; 
    if (file_exists(BASE_URI . $index)) { 
     $xml = (!empty($this->compress)) ? gzfile(BASE_URI . $index) : file(BASE_URI . $index); 
     $tags = $this->xml_tag(implode('', $xml), array('sitemap')); 
     foreach ($tags as $xml) { 
     $loc = str_replace(BASE_URL, '', $this->xml_tag($xml, 'loc')); 
     $lastmod = $this->xml_tag($xml, 'lastmod'); 
     $lastmod = ($lastmod) ? date('Y-m-d', strtotime($lastmod)) : date('Y-m-d'); 
     if (file_exists(BASE_URI . $loc)) $sitemaps[$loc] = $lastmod; 
     } 
    } 
    $sitemaps[$file] = date('Y-m-d'); 
    $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; 
    $xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
    foreach ($sitemaps as $loc => $lastmod) { 
     $xml .= ' <sitemap>' . "\n"; 
     $xml .= ' <loc>' . BASE_URL . $loc . '</loc>' . "\n"; 
     $xml .= ' <lastmod>' . $lastmod . '</lastmod>' . "\n"; 
     $xml .= ' </sitemap>' . "\n"; 
    } 
    $xml .= '</sitemapindex>' . "\n"; 
    if (!empty($this->compress)) $xml = gzencode($xml, 9); 
    $fp = fopen(BASE_URI . $index, 'wb'); 
    fwrite($fp, $xml); 
    fclose($fp); 
    } 

    private function xml_tag ($xml, $tag, &$end='') { 
    if (is_array($tag)) { 
     $tags = array(); 
     while ($value = $this->xml_tag($xml, $tag[0], $end)) { 
     $tags[] = $value; 
     $xml = substr($xml, $end); 
     } 
     return $tags; 
    } 
    $pos = strpos($xml, "<{$tag}>"); 
    if ($pos === false) return false; 
    $start = strpos($xml, '>', $pos) + 1; 
    $length = strpos($xml, "</{$tag}>", $start) - $start; 
    $end = strpos($xml, '>', $start + $length) + 1; 
    return ($end !== false) ? substr($xml, $start, $length) : false; 
    } 

    public function ping_search_engines() { 
    $sitemap = BASE_URL . 'sitemap-index.xml' . $this->compress; 
    $engines = array(); 
    $engines['www.google.com'] = '/webmasters/tools/ping?sitemap=' . urlencode($sitemap); 
    $engines['www.bing.com'] = '/webmaster/ping.aspx?siteMap=' . urlencode($sitemap); 
    $engines['submissions.ask.com'] = '/ping?sitemap=' . urlencode($sitemap); 
    foreach ($engines as $host => $path) { 
     if ($fp = fsockopen($host, 80)) { 
     $send = "HEAD $path HTTP/1.1\r\n"; 
     $send .= "HOST: $host\r\n"; 
     $send .= "CONNECTION: Close\r\n\r\n"; 
     fwrite($fp, $send); 
     $http_response = fgets($fp, 128); 
     fclose($fp); 
     list($response, $code) = explode (' ', $http_response); 
     if ($code != 200) trigger_error ("{$host} ping was unsuccessful.<br />Code: {$code}<br />Response: {$response}"); 
     } 
    } 
    } 

    public function __destruct() { 
    $this->save(); 
    } 

} 

?> 

페이지의 사용의 예를 이미 있습니다 :

<?php 

require_once ('php/classes/Sitemap.php'); 

$sitemap = new Sitemap; 

if (get('pages')) { 
    $sitemap->page('pages'); 
    $result = db_query ("SELECT url, created FROM pages"); // 20 pages 
    while (list($url, $created) = $result->fetch_row()) { 
    $sitemap->url($url, $created, 'yearly'); 
    } 
} 

if (get('posts')) { 
    $sitemap->page('posts'); 
    $result = db_query ("SELECT url, updated FROM posts"); // 70,000 posts 
    while (list($url, $updated) = $result->fetch_row()) { 
    $sitemap->url($url, $updated, 'monthly'); 
    } 
} 

$sitemap->close(); 
unset ($sitemap); 

function get ($name) { 
    return (isset($_GET['update']) && strpos($_GET['update'], $name) !== false) ? true : false; 
} 

?> 

답변

0

나는이 부분 ....

if (get('pages')) { 
$sitemap->page('pages'); 
$result = db_query ("SELECT uri FROM myuri"); 
while (list($url) = mysql_fetch_row($result)) { 
$sitemap->url($url,'', 'yearly'); 
    } 
} 

가 확인하십시오 $updated

을 필요로하지 않을 경우를 바꿀 것은? 어쨌든 함수가 빈 문자열을 기본값으로 설정하는 것처럼 보입니다. 그러나 어쩌면 테이블의 타임 스탬프 열에서 마지막으로 업데이트 된 날짜도 가져올 수 있습니다. 그리고 그것을 넣은 함수에 넣을 수 있습니다. 또한

.... 제거이 부분 ...

if (get('posts')) { 
$sitemap->page('posts'); 
$result = db_query ("SELECT url, updated FROM posts"); // 70,000 posts 
while (list($url, $updated) = $result->fetch_row()) { 
$sitemap->url($url, $updated, 'monthly'); 
} 
} 
+0

덕분에 지금까지 그 아직 wokring 그래서 ...이 현재 상태 – VolkaRacho

+0

변경과 새 게시물을 작성하지 동안 (목록 ($의 URL ($ url, $ created) = mysql_fetch_row ($ result)) – KyleK

+0

은 이제 매력처럼 완벽하게 작동합니다! 고마워요 :) – VolkaRacho

관련 문제