0
sparkContext.addFile 메소드를 사용하여 각 실행 프로그램에 큰 파일을 전달하려고합니다. 이 큰 파일의SparkContext addFile이 FileNotFoundException을 발생시킵니다.
소스가 아마존 S3 (참고 : 소스의 경우 HDFS 모든 것이 잘 작동입니다)입니다
오류의 원인val context = stream.context.sparkContext
context.addFile("s3n://bucket-name/file-path")
...
SparkFiles.get(file-name)
:
java.io.FileNotFoundException: File s3n://bucket-name/file-path does not exist.
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.listStatus(S3NativeFileSystem.java:945)
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.listStatus(S3NativeFileSystem.java:887)
at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.listStatus(EmrFileSystem.java:343)
at org.apache.spark.util.Utils$.fetchHcfsFile(Utils.scala:596)
at org.apache.spark.util.Utils$.doFetchFile(Utils.scala:566)
at org.apache.spark.util.Utils$.fetchFile(Utils.scala:356)
at org.apache.spark.executor.Executor$$anonfun$org$apache$spark$executor$Executor$$updateDependencies$3.apply(Executor.scala:393)
at org.apache.spark.executor.Executor$$anonfun$org$apache$spark$executor$Executor$$updateDependencies$3.apply(Executor.scala:390)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
at org.apache.spark.executor.Executor.org$apache$spark$executor$Executor$$updateDependencies(Executor.scala:390)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:193)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
그러나 "fs -ls"하면 파일에 액세스 할 수 있습니다.
그 이유는 무엇일까요?
PS : 스파크 버전 : 1.5.2