2011-05-05 5 views
0

이 HTML을 simple_html_dom과 구문 분석하려고 시도하지만 대부분이 주석으로 처리됩니다. (It's a mobile page).구문 분석 모바일 웹 사이트에서 html 코드

<?php 
error_reporting(E_ALL); 
ini_set('display_errors', 1); 
include(dirname(__FILE__) .'/inc/simple_html_dom.php'); 

$page = 'http://www.bungie.net/mobile/GlobalChallenges.aspx'; 

$html = new simple_html_dom(); 
$html->load_file($page); 

$wExp = $html->find("#wExpSeconds"); 

$wExp = htmlspecialchars_decode($wExp->attr['title']); 
echo $wExp; 

?> 

....

<!DOCTYPE html> 
<html lang="en"> 
<head><title> 
    Bungie.net 
</title> 

</head> 
<body> 
<!-- 
<div id="page"> 

    <span id="wExpSeconds" title="1304935200000"></span> 
    <span id="dExpMilliseconds" title="1304676000000"></span> 
    <span id="pageClass" class="reach"></span> 
    <div id="mainContent"> 

     <table class="breadcrumb" cellpadding="0" cellspacing="0"> 
      <tr><td colspan="3"><img id="headerPlaceholder2" alt="" src="/images/mobile/blankNavIcon.gif" /></td></tr> 
      <tr> 
       <td><a href="/mobile/ReachOnline.aspx" class="ajax">Back</a></td> 
       <td class="middle"><h3>Challenges</h3></td> 
       <td>&nbsp;</td> 
      </tr> 
      <tr class="spacer"><td colspan="3">&nbsp;</td></tr> 
     </table> 




     <ul class="challengesList"> 

        <li class="weekly"> 
         <h3>Weekly Challenge</h3> 
         <ul> 
          <li> 
           <img id="main_wcr_img_0" title="Alexandria: LASO" src="/images/reachstats/challenges/1.png" alt="Alexandria: LASO" style="border-width:0px;" /> 
           <div class="info"> 
            <h4>Alexandria: LASO</h4> 
            <p>Completion Reward: 16000cR</p> 
            <p class="description">Complete Alexandria, Legendary, All Skulls On (LASO).</p> 
            <p class="time">Time Remaining: <span class="wcd"></span></p> 
           </div> 
           <div class="clear"></div> 
          </li> 
         </ul> 
        </li> 

        <li class="daily"> 
         <h3>Daily Challenges</h3> 
         <ul> 

         <li> 
          <img id="main_dcr_img_0" title="One Spartan Army" src="/images/reachstats/challenges/0.png" alt="One Spartan Army" style="border-width:0px;" /> 
          <div class="info"> 
           <h4>One Spartan Army</h4> 
           <p>Completion Reward: 1875cR</p> 
           <p class="description">Kill 250 enemies in Firefight Matchmaking.</p> 
           <p class="time">Time Remaining: <span class="dcd"></span></p> 
          </div> 
          <div class="clear"></div> 
         </li> 

         <li> 
          <img id="main_dcr_img_1" title="Blast Radius" src="/images/reachstats/challenges/3.png" alt="Blast Radius" style="border-width:0px;" /> 
          <div class="info"> 
           <h4>Blast Radius</h4> 
           <p>Completion Reward: 1000cR</p> 
           <p class="description">Kill 40 enemies in Firefight Matchmaking with grenades.</p> 
           <p class="time">Time Remaining: <span class="dcd"></span></p> 
          </div> 
          <div class="clear"></div> 
         </li> 

         <li> 
          <img id="main_dcr_img_2" title="Hyper Lethal Vector" src="/images/reachstats/challenges/3.png" alt="Hyper Lethal Vector" style="border-width:0px;" /> 
          <div class="info"> 
           <h4>Hyper Lethal Vector</h4> 
           <p>Completion Reward: 1125cR</p> 
           <p class="description">Kill 150 enemies with headshots in Firefight Matchmaking.</p> 
           <p class="time">Time Remaining: <span class="dcd"></span></p> 
          </div> 
          <div class="clear"></div> 
         </li> 

         <li> 
          <img id="main_dcr_img_3" title="Be Their Huckleberry" src="/images/reachstats/challenges/3.png" alt="Be Their Huckleberry" style="border-width:0px;" /> 
          <div class="info"> 
           <h4>Be Their Huckleberry</h4> 
           <p>Completion Reward: 1250cR</p> 
           <p class="description">Kill 100 enemies in Firefight Matchmaking with the pistol.</p> 
           <p class="time">Time Remaining: <span class="dcd"></span></p> 
          </div> 
          <div class="clear"></div> 
         </li> 

         </ul> 
        </li> 

     </ul> 

    </div> 

    <div id="footer"> 

     <p>&copy; 2011 Bungie, Inc. All rights reserved.</p> 
     <p class="footer_links"> 
      <a id="mLink" href="/mobile/dl/globalchallenges.aspx">mobile</a> 
      <a id="fsLink" href="/mobile/redirect.aspx?path=%2fstats%2freach%2fglobalchallenges.aspx">full site</a> 
     </p> 
    </div> 
</div> 
--> 
</body> 
</html> 

답변

0
<?php 
    $page = 'http://www.bungie.net/mobile/GlobalChallenges.aspx'; 

    // read page into a string 
    $page_str = file_get_contents($page); 

    // remove HTML comment tags 
    $page_str = str_replace(array('<!--', '-->'), '', $page_str); 

    // create SimpleHTMLDom object with the string 
    $html = str_get_html($page_str); 

    $wExp = $html->find("#wExpSeconds", 0); // only look for the first occurrence 
    $wExp = htmlspecialchars_decode($wExp->attr['title']); 

    echo $wExp; 
?> 

출력 :

1304935200000