코드가 컴파일되지 않습니다. JRE를 1.7로 변경했습니다. 컴파일러는 Eclipse에서 클래스를 강조 표시하지 않으며 컴파일러에서 CrawlConfig가 실패한 것처럼 보입니다. 클래스는 Linux의 명령 행에서 실행되어야합니다.클래스 CrawlConfig - VariableDeclaratorId에서 crawler4j 컴파일 오류가 발생합니다.
아이디어가 있으십니까?
컴파일러 오류 - 설명 리소스 경로 위치 유형 토큰 "crawlStorageFolder"에 구문 오류, VariableDeclaratorId 예상이 토큰 후 zeocrawler.java/zeowebcrawler/SRC/메인/자바/COM/예 라인 (95) 자바 문제
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.parser.HtmlParseData;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.WebURL;
public class Controller {
String crawlStorageFolder = "/data/crawl/root";
int numberOfCrawlers = 7;
CrawlConfig config = new CrawlConfig();
config.setCrawlStorageFolder(crawlStorageFolder);
PageFetcher pageFetcher = new PageFetcher(config);
RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
controller.addSeed("http://www.senym.com");
controller.addSeed("http://www.merrows.co.uk");
controller.addSeed("http://www.zeoic.com");
controller.start(MyCrawler.class, numberOfCrawlers);
}
public URLConnection connectURL(String strURL) {
URLConnection conn =null;
try {
URL inputURL = new URL(strURL);
conn = inputURL.openConnection();
int test = 0;
}catch(MalformedURLException e) {
System.out.println("Please input a valid URL");
}catch(IOException ioe) {
System.out.println("Can not connect to the URL");
}
return conn;
}
public static void updatelongurl()
{
// System.out.println("Short URL: "+ shortURL);
// urlConn = connectURL(shortURL);
// urlConn.getHeaderFields();
// System.out.println("Original URL: "+ urlConn.getURL());
/* connectURL - This function will take a valid url and return a
URL object representing the url address. */
}
public class MyCrawler extends WebCrawler {
private Pattern FILTERS = Pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g"
+ "|png|tiff?|mid|mp2|mp3|mp4"
+ "|wav|avi|mov|mpeg|ram|m4v|pdf"
+ "|rm|smil|wmv|swf|wma|zip|rar|gz))$");
/**
* You should implement this function to specify whether
* the given url should be crawled or not (based on your
* crawling logic).
*/
@Override
public boolean shouldVisit(WebURL url) {
String href = url.getURL().toLowerCase();
return !FILTERS.matcher(href).matches() && href.startsWith("http://www.ics.uci.edu/");
}
/**
* This function is called when a page is fetched and ready
* to be processed by your program.
*/
@Override
public void visit(Page page) {
String url = page.getWebURL().getURL();
System.out.println("URL: " + url);
if (page.getParseData() instanceof HtmlParseData) {
HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
String text = htmlParseData.getText();
String html = htmlParseData.getHtml();
List<WebURL> links = htmlParseData.getOutgoingUrls();
System.out.println("Text length: " + text.length());
System.out.println("Html length: " + html.length());
System.out.println("Number of outgoing links: " + links.size());
}
}
}