2017-04-05 3 views
2

나는 임의의 df를 생성하고 그것을 spark 2.1의 원판 파일로 디스크에 씁니다. 이 문제는 열의 수가 10000이 될 때 실행되지만 10000이면 문제가 없습니다.10000 개가 넘는 열이있는 쪽모 세공 파일로 저장하는 것이 JaninoRuntimeException으로 이어지는 이유는 무엇입니까?

100000 열의 경우 spark는 화면에 일련의 코드를 인쇄하고 아래와 같이 오류를 발생시킵니다.

내가 어떻게 마루에 오류없이 쓸 수 있습니까?

import org.apache.spark.sql.types.{StructType,StructField,IntegerType,DoubleType} 
import org.apache.spark.ml.Pipeline 
import org.apache.spark.rdd.RDD 
import org.apache.spark.sql._ 
import scala.util.Random 
import scala.math 

val nRows = 10000 
val nCols = 100000 
val rD = sc.parallelize(0 to nRows-1).map { _ => Row.fromSeq(Seq.fill(nCols)(math.ceil(1000*Random.nextDouble()))) } 

val schema = StructType((0 to nCols-1).map { i => StructField("C" + i, DoubleType, true) }) 
val df = spark.createDataFrame(rD, schema) 
df.select("*").write.format("parquet").save("df.parquet") 

/* 379357 */ private void apply_22702(InternalRow i) { 
/* 379358 */ 
/* 379359 */ 
/* 379360 */  boolean isNull90808 = i.isNullAt(90808); 
/* 379361 */  double value90808 = isNull90808 ? -1.0 : (i.getDouble(90808)); 
/* 379362 */  if (isNull90808) { 
/* 379363 */  rowWriter.setNullAt(90808); 
/* 379364 */  } else { 
/* 379365 */  rowWriter.write(90808, value90808); 
/* 379366 */  } 
/* 379367 */ 
/* 379368 */ 
/* 379369 */  boolean isNull90809 = i.isNullAt(90809); 
/* 379370 */  double value90809 = isNull90809 ? -1.0 : (i.getDouble(90809)); 
/* 379371 */  if (isNull90809) { 
/* 379372 */  rowWriter.setNullAt(90809); 
/* 379373 */  } else { 
/* 379374 */  rowWriter.write(90809, value90809); 
/* 379375 */  } 
/* 379376 */ 
/* 379377 */ 
/* 379378 */  boolean isNull90810 = i.isNullAt(90810); 
/* 379379 */  double value90810 = isNull90810 ? -1.0 : (i.getDouble(90810)); 
/* 379380 */  if (isNull90810) { 
/* 379381 */  rowWriter.setNullAt(90810); 
/* 379382 */  } else { 
/* 379383 */  rowWriter.write(90810, value90810); 
/* 379384 */  } 
/* 379385 */ 
. 
. 
. 
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:941) 
    at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) 
    at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) 
    at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) 
    at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) 
    at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) 
    ... 25 more 
Caused by: org.codehaus.janino.JaninoRuntimeException: Constant pool for class org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection has grown past JVM limit of 0xFFFF 
    at org.codehaus.janino.util.ClassFile.addToConstantPool(ClassFile.java:499) 
    at org.codehaus.janino.util.ClassFile.addConstantIntegerInfo(ClassFile.java:395) 
    at org.codehaus.janino.UnitCompiler.addConstantIntegerInfo(UnitCompiler.java:11137) 
    at org.codehaus.janino.UnitCompiler.pushConstant(UnitCompiler.java:9681) 
    at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4911) 
    at org.codehaus.janino.UnitCompiler.access$7700(UnitCompiler.java:206) 
    at org.codehaus.janino.UnitCompiler$12.visitIntegerLiteral(UnitCompiler.java:3776) 
    at org.codehaus.janino.UnitCompiler$12.visitIntegerLiteral(UnitCompiler.java:3762) 
    at org.codehaus.janino.Java$IntegerLiteral.accept(Java.java:4635) 
    at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:3762) 
    at org.codehaus.janino.UnitCompiler.fakeCompile(UnitCompiler.java:3128) 
    at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:4927) 
    at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4526) 
    at org.codehaus.janino.UnitCompiler.access$7500(UnitCompiler.java:206) 
    at org.codehaus.janino.UnitCompiler$12.visitMethodInvocation(UnitCompiler.java:3774) 
    at org.codehaus.janino.UnitCompiler$12.visitMethodInvocation(UnitCompiler.java:3762) 
    at org.codehaus.janino.Java$MethodInvocation.accept(Java.java:4328) 
    at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:3762) 
    at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:4933) 
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2330) 
    at org.codehaus.janino.UnitCompiler.access$2600(UnitCompiler.java:206) 
    at org.codehaus.janino.UnitCompiler$6.visitLocalVariableDeclarationStatement(UnitCompiler.java:1386) 
    at org.codehaus.janino.UnitCompiler$6.visitLocalVariableDeclarationStatement(UnitCompiler.java:1370) 
    at org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:2974) 
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) 
    at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) 
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2811) 
    at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262) 
    at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234) 
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538) 
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) 
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) 
    at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) 
    at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) 
    at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) 
    at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) 
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) 
    at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) 
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) 
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:420) 
    at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:206) 
    at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:374) 
    at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:369) 
    at org.codehaus.janino.Java$AbstractPackageMemberClassDeclaration.accept(Java.java:1309) 
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) 
    at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:345) 
    at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:396) 
    at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:311) 
    at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) 
    at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:196) 
    at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:91) 
    at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:935) 
    ... 30 more 

답변

1

그것은 (SPARK-18492SPARK-16845에보고 된) CODEGEN은 64K 방식의 한계를 넘어가는 그 불쾌한 문제 중 하나처럼 보인다.

야간 2.2.0-SNAPSHOT 버전 중 하나를 the Nightly Packages and Artifacts에서 확인하고 나중에 문제가 해결 될 수 있는지 (버전이 출시 될 때) 확인할 수 있습니다.

+0

스냅 샷이 문제를 해결 했습니까? –

+0

2.2.0이 릴리스되었습니다. –

관련 문제