2013-08-08 3 views
1

코드가 컴파일되지 않습니다. JRE를 1.7로 변경했습니다. 컴파일러는 Eclipse에서 클래스를 강조 표시하지 않으며 컴파일러에서 CrawlConfig가 실패한 것처럼 보입니다. 클래스는 Linux의 명령 행에서 실행되어야합니다.클래스 CrawlConfig - VariableDeclaratorId에서 crawler4j 컴파일 오류가 발생합니다.

아이디어가 있으십니까?

컴파일러 오류 - 설명 리소스 경로 위치 유형 토큰 "crawlStorageFolder"에 구문 오류, VariableDeclaratorId 예상이 토큰 후 zeocrawler.java/zeowebcrawler/SRC/메인/자바/COM/예 라인 (95) 자바 문제

import edu.uci.ics.crawler4j.crawler.CrawlConfig; 
import edu.uci.ics.crawler4j.crawler.CrawlController; 
import edu.uci.ics.crawler4j.crawler.Page; 
import edu.uci.ics.crawler4j.crawler.WebCrawler; 
import edu.uci.ics.crawler4j.fetcher.PageFetcher; 
import edu.uci.ics.crawler4j.parser.HtmlParseData; 
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig; 
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer; 
import edu.uci.ics.crawler4j.url.WebURL; 

    public class Controller { 

     String crawlStorageFolder = "/data/crawl/root"; 
     int numberOfCrawlers = 7; 

     CrawlConfig config = new CrawlConfig(); 

     config.setCrawlStorageFolder(crawlStorageFolder); 

     PageFetcher pageFetcher = new PageFetcher(config); 
     RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); 
     RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); 
     CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); 

      controller.addSeed("http://www.senym.com"); 
      controller.addSeed("http://www.merrows.co.uk"); 
      controller.addSeed("http://www.zeoic.com"); 


      controller.start(MyCrawler.class, numberOfCrawlers); 








    } 
    public URLConnection connectURL(String strURL) { 
     URLConnection conn =null; 
     try { 
     URL inputURL = new URL(strURL); 
     conn = inputURL.openConnection(); 
      int test = 0; 

     }catch(MalformedURLException e) { 
     System.out.println("Please input a valid URL"); 
     }catch(IOException ioe) { 
     System.out.println("Can not connect to the URL"); 
     } 
     return conn; 
     } 

    public static void updatelongurl() 
    { 

//  System.out.println("Short URL: "+ shortURL); 
//  urlConn = connectURL(shortURL); 
//  urlConn.getHeaderFields(); 
//  System.out.println("Original URL: "+ urlConn.getURL()); 

/* connectURL - This function will take a valid url and return a 
URL object representing the url address. */ 



    } 






    public class MyCrawler extends WebCrawler { 

     private  Pattern FILTERS = Pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g" 
                  + "|png|tiff?|mid|mp2|mp3|mp4" 
                  + "|wav|avi|mov|mpeg|ram|m4v|pdf" 
                  + "|rm|smil|wmv|swf|wma|zip|rar|gz))$"); 

     /** 
     * You should implement this function to specify whether 
     * the given url should be crawled or not (based on your 
     * crawling logic). 
     */ 
     @Override 
     public boolean shouldVisit(WebURL url) { 
       String href = url.getURL().toLowerCase(); 
       return !FILTERS.matcher(href).matches() && href.startsWith("http://www.ics.uci.edu/"); 
     } 

     /** 
     * This function is called when a page is fetched and ready 
     * to be processed by your program. 
     */ 
     @Override 
     public void visit(Page page) {   
       String url = page.getWebURL().getURL(); 
       System.out.println("URL: " + url); 

       if (page.getParseData() instanceof HtmlParseData) { 
         HtmlParseData htmlParseData = (HtmlParseData) page.getParseData(); 
         String text = htmlParseData.getText(); 
         String html = htmlParseData.getHtml(); 
         List<WebURL> links = htmlParseData.getOutgoingUrls(); 

         System.out.println("Text length: " + text.length()); 
         System.out.println("Html length: " + html.length()); 
         System.out.println("Number of outgoing links: " + links.size()); 
       } 
     } 
} 

답변

0

코드가 깨끗한 것 같아서 꽤 이상한 오류입니다. 명령 행에서 -clean 옵션을 사용하여 Eclipse를 시작하십시오.

0

변경

String crawlStorageFolder = "/data/crawl/root";

String crawlStorageFolder = "./data/crawl/root";

즉 추가 선도적 인 .

관련 문제