hadoop에서 jar 파일을 실행하는 중 오류가 발생했습니다.

hadoop에서 jar 파일을 실행하는 동안 널 포인터 예외가 발생합니다. 나는 그 문제가 무엇인지 이해할 수 없다.hadoop에서 jar 파일을 실행하는 중 오류가 발생했습니다.

package mapreduce; 

import java.io.*; 

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.conf.*; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 
import org.apache.hadoop.util.*; 


public class StockDriver extends Configured implements Tool 
{ 
     public int run(String[] args) throws Exception 
     { 
      //creating a JobConf object and assigning a job name for identification purposes 
      JobConf conf = new JobConf(getConf(), StockDriver.class); 
      conf.setJobName("StockDriver"); 

      //Setting configuration object with the Data Type of output Key and Value 
      conf.setOutputKeyClass(Text.class); 
      conf.setOutputValueClass(IntWritable.class); 

      //Providing the mapper and reducer class names 
      conf.setMapperClass(StockMapper.class); 
      conf.setReducerClass(StockReducer.class); 

      File in = new File(args[0]); 
      int number_of_companies = in.listFiles().length; 
      for(int iter=1;iter<=number_of_companies;iter++) 
      { 
       Path inp = new Path(args[0]+"/i"+Integer.toString(iter)+".txt"); 
       Path out = new Path(args[1]+Integer.toString(iter)); 
       //the HDFS input and output directory to be fetched from the command line 
       FileInputFormat.addInputPath(conf, inp); 
       FileOutputFormat.setOutputPath(conf, out); 
       JobClient.runJob(conf); 
      } 
      return 0; 
     } 

     public static void main(String[] args) throws Exception 
     { 
      int res = ToolRunner.run(new Configuration(), new StockDriver(),args); 
      System.exit(res); 
     } 
}

매퍼 클래스 :

package mapreduce; 

import java.io.IOException; 
import gonn.ConstraintTree; 

import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 

public class StockMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> 
{ 
     //hadoop supported data types 
     private static IntWritable send; 
     private Text word; 

     //map method that performs the tokenizer job and framing the initial key value pairs 
     public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException 
     { 
      //taking one line at a time and tokenizing the same 
      String line = value.toString(); 
      String[] words = line.split(" "); 
      String out = ConstraintTree.isMain(words[1]); 
      word = new Text(out); 

      send = new IntWritable(Integer.parseInt(words[0])); 
      output.collect(word, send); 
     } 
}

감속기 등급 :

package mapreduce; 

import java.io.IOException; 
import java.util.Iterator; 

import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 

public class StockReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> 
{ 
     //reduce method accepts the Key Value pairs from mappers, do the aggregation based on keys and produce the final output 
     public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException 
     { 
      int val = 0; 

      while (values.hasNext()) 
      { 
       val += values.next().get(); 
      } 
      output.collect(key, new IntWritable(val)); 
     } 
}

스택 추적 : 다음

는

내 드라이버 클래스입니다

Exception in thread "main" java.lang.NullPointerException 
    at mapreduce.StockDriver.run(StockDriver.java:29) 
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) 
    at mapreduce.StockDriver.main(StockDriver.java:44) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:606) 
    at org.apache.hadoop.util.RunJar.main(RunJar.java:212)

java -jar myfile.jar args...을 사용하여 jar 파일을 실행하려고하면 정상적으로 작동합니다. 하지만 hadoop 클러스터에서 실행하려고했을 때 hadoop jar myfile.jar [MainClass] args...을 사용하면 오류가 발생합니다.

그냥 선 (29)는 int number_of_companies = in.listFiles().length;

출처

2014-09-25 Darshil Babel

arg [0]의 각 파일에 대해 별도의 MR 작업을 실행하고 있습니까? – blackSmith

@blackSmith 아니요, 각 파일마다 동일한 Mapreduce 작업을 사용하고 있습니다. –

문제의 원인은 HDFS의 파일을 읽기위한 File API의 사용이다, 명확하게. 존재하지 않는 경로가있는 File 개체를 만드는 경우 메서드가 반환됩니다. HDFS 디렉토리에있는 파일의 수를 추출하기 위해 다음과 같은 사용

in.listFiles().length

FileSystem fs = FileSystem.get(new Configuration()); 
int number_of_companies = fs.listStatus(new Path(arg[0])).length;

을 HDFS (I 가정)는 로컬 파일 시스템에 대한 존재의에 입력 디렉토리로, NPE은오고

출처

2014-09-25 09:35:30 blackSmith

hadoop에서 jar 파일을 실행하는 중 오류가 발생했습니다.

답변

관련 문제