Skip to content

Commit 707933f

Browse files
committed
Reduce classForName calls to improve registrator performance.
1 parent 563d6aa commit 707933f

File tree

1 file changed

+62
-96
lines changed

1 file changed

+62
-96
lines changed

adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala

+62-96
Original file line numberDiff line numberDiff line change
@@ -94,18 +94,9 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
9494

9595
override def registerClasses(kryo: Kryo) {
9696

97-
def registerByName(kryo: Kryo, name: String) {
98-
try {
99-
kryo.register(Class.forName(name))
100-
} catch {
101-
case cnfe: java.lang.ClassNotFoundException => {
102-
debug("Could not register class %s by name".format(name))
103-
}
104-
}
105-
}
106-
10797
// Register Avro classes using fully qualified class names
10898
// Sort alphabetically and add blank lines between packages
99+
// Classes that require Class.forName list below in forNameClasses
109100

110101
// htsjdk.samtools
111102
kryo.register(classOf[htsjdk.samtools.CigarElement])
@@ -116,8 +107,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
116107
kryo.register(classOf[htsjdk.samtools.SAMSequenceDictionary])
117108
kryo.register(classOf[htsjdk.samtools.SAMFileHeader])
118109
kryo.register(classOf[htsjdk.samtools.SAMSequenceRecord])
119-
registerByName(kryo, "htsjdk.samtools.SAMFileHeader$GroupOrder")
120-
registerByName(kryo, "htsjdk.samtools.SAMFileHeader$SortOrder")
121110

122111
// htsjdk.variant.vcf
123112
kryo.register(classOf[htsjdk.variant.vcf.VCFContigHeaderLine])
@@ -128,7 +117,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
128117
kryo.register(classOf[htsjdk.variant.vcf.VCFHeaderLine])
129118
kryo.register(classOf[htsjdk.variant.vcf.VCFHeaderLineCount])
130119
kryo.register(classOf[htsjdk.variant.vcf.VCFHeaderLineType])
131-
registerByName(kryo, "htsjdk.variant.vcf.VCFCompoundHeaderLine$SupportedHeaderLineType")
132120

133121
// java.lang
134122
kryo.register(classOf[java.lang.Class[_]])
@@ -140,23 +128,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
140128
kryo.register(classOf[java.util.HashMap[_, _]])
141129
kryo.register(classOf[java.util.HashSet[_]])
142130

143-
// org.apache.avro
144-
registerByName(kryo, "org.apache.avro.Schema$RecordSchema")
145-
registerByName(kryo, "org.apache.avro.Schema$Field")
146-
registerByName(kryo, "org.apache.avro.Schema$Field$Order")
147-
registerByName(kryo, "org.apache.avro.Schema$UnionSchema")
148-
registerByName(kryo, "org.apache.avro.Schema$Type")
149-
registerByName(kryo, "org.apache.avro.Schema$LockableArrayList")
150-
registerByName(kryo, "org.apache.avro.Schema$BooleanSchema")
151-
registerByName(kryo, "org.apache.avro.Schema$NullSchema")
152-
registerByName(kryo, "org.apache.avro.Schema$StringSchema")
153-
registerByName(kryo, "org.apache.avro.Schema$IntSchema")
154-
registerByName(kryo, "org.apache.avro.Schema$FloatSchema")
155-
registerByName(kryo, "org.apache.avro.Schema$EnumSchema")
156-
registerByName(kryo, "org.apache.avro.Schema$Name")
157-
registerByName(kryo, "org.apache.avro.Schema$LongSchema")
158-
registerByName(kryo, "org.apache.avro.generic.GenericData$Array")
159-
160131
// org.apache.hadoop.conf
161132
kryo.register(classOf[org.apache.hadoop.conf.Configuration],
162133
new WritableSerializer[org.apache.hadoop.conf.Configuration])
@@ -291,23 +262,10 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
291262
kryo.register(classOf[org.bdgenomics.formats.avro.VariantCallingAnnotations],
292263
new AvroSerializer[org.bdgenomics.formats.avro.VariantCallingAnnotations])
293264

294-
// org.apache.spark.internal
295-
registerByName(kryo, "org.apache.spark.internal.io.FileCommitProtocol$TaskCommitMessage")
296-
297265
// org.apache.spark.catalyst
298266
kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.UnsafeRow])
299267

300268
// org.apache.spark.sql
301-
registerByName(kryo, "org.apache.spark.sql.execution.datasources.FileFormatWriter$WriteTaskResult")
302-
registerByName(kryo, "org.apache.spark.sql.execution.datasources.BasicWriteTaskStats")
303-
registerByName(kryo, "org.apache.spark.sql.execution.datasources.ExecutedWriteSummary")
304-
registerByName(kryo, "org.apache.spark.sql.execution.datasources.WriteTaskResult")
305-
registerByName(kryo, "org.apache.spark.sql.types.BooleanType$")
306-
registerByName(kryo, "org.apache.spark.sql.types.DoubleType$")
307-
registerByName(kryo, "org.apache.spark.sql.types.FloatType$")
308-
registerByName(kryo, "org.apache.spark.sql.types.IntegerType$")
309-
registerByName(kryo, "org.apache.spark.sql.types.LongType$")
310-
registerByName(kryo, "org.apache.spark.sql.types.StringType$")
311269
kryo.register(classOf[org.apache.spark.sql.types.ArrayType])
312270
kryo.register(classOf[org.apache.spark.sql.types.MapType])
313271
kryo.register(classOf[org.apache.spark.sql.types.Metadata])
@@ -354,26 +312,14 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
354312
kryo.register(classOf[scala.Array[Long]])
355313
kryo.register(classOf[scala.Array[String]])
356314
kryo.register(classOf[scala.Array[Option[_]]])
357-
registerByName(kryo, "scala.Tuple2$mcCC$sp")
358-
359-
// scala.collection
360-
registerByName(kryo, "scala.collection.Iterator$$anon$11")
361-
registerByName(kryo, "scala.collection.Iterator$$anonfun$toStream$1")
362-
363-
// scala.collection.convert
364-
registerByName(kryo, "scala.collection.convert.Wrappers$")
365315

366316
// scala.collection.immutable
367317
kryo.register(classOf[scala.collection.immutable.::[_]])
368318
kryo.register(classOf[scala.collection.immutable.Range])
369-
registerByName(kryo, "scala.collection.immutable.Stream$Cons")
370-
registerByName(kryo, "scala.collection.immutable.Stream$Empty$")
371-
registerByName(kryo, "scala.collection.immutable.Set$EmptySet$")
372319

373320
// scala.collection.mutable
374321
kryo.register(classOf[scala.collection.mutable.ArrayBuffer[_]])
375322
kryo.register(classOf[scala.collection.mutable.ListBuffer[_]])
376-
registerByName(kryo, "scala.collection.mutable.ListBuffer$$anon$1")
377323
kryo.register(classOf[scala.collection.mutable.WrappedArray.ofInt])
378324
kryo.register(classOf[scala.collection.mutable.WrappedArray.ofLong])
379325
kryo.register(classOf[scala.collection.mutable.WrappedArray.ofByte])
@@ -383,47 +329,67 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
383329
// scala.math
384330
kryo.register(scala.math.Numeric.LongIsIntegral.getClass)
385331

386-
// scala.reflect
387-
registerByName(kryo, "scala.reflect.ClassTag$GenericClassTag")
388-
389-
// This seems to be necessary when serializing a RangePartitioner, which writes out a ClassTag:
390-
//
391-
// https://github.com/apache/spark/blob/v1.5.2/core/src/main/scala/org/apache/spark/Partitioner.scala#L220
392-
//
393-
// See also:
394-
//
395-
// https://mail-archives.apache.org/mod_mbox/spark-user/201504.mbox/%3CCAC95X6JgXQ3neXF6otj6a+F_MwJ9jbj9P-Ssw3Oqkf518_eT1w@mail.gmail.com%3E
396-
registerByName(kryo, "scala.reflect.ClassTag$$anon$1")
397-
398-
// needed for manifests
399-
registerByName(kryo, "scala.reflect.ManifestFactory$ClassTypeManifest")
400-
401-
// Added to Spark in 1.6.0; needed here for Spark < 1.6.0.
402-
kryo.register(classOf[Array[Tuple1[Any]]])
403-
kryo.register(classOf[Array[(Any, Any)]])
404-
kryo.register(classOf[Array[(Any, Any, Any)]])
405-
kryo.register(classOf[Array[(Any, Any, Any, Any)]])
406-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any)]])
407-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any)]])
408-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any)]])
409-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any)]])
410-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
411-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
412-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
413-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
414-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
415-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
416-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
417-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
418-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
419-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
420-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
421-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
422-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
423-
kryo.register(classOf[Array[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any)]])
424-
425332
kryo.register(Map.empty.getClass)
426-
kryo.register(Nil.getClass)
427-
kryo.register(None.getClass)
333+
334+
ADAMKryoRegistrator.forNameClasses.foreach { clazz =>
335+
try {
336+
kryo.register(clazz)
337+
} catch {
338+
case _: Throwable => // do nothing
339+
}
340+
}
341+
}
342+
}
343+
344+
private[serialization] object ADAMKryoRegistrator {
345+
private lazy val forNameClasses: Seq[Class[_]] = {
346+
Seq(
347+
"htsjdk.samtools.SAMFileHeader$GroupOrder",
348+
"htsjdk.samtools.SAMFileHeader$SortOrder",
349+
"htsjdk.variant.vcf.VCFCompoundHeaderLine$SupportedHeaderLineType",
350+
"org.apache.avro.Schema$RecordSchema",
351+
"org.apache.avro.Schema$Field",
352+
"org.apache.avro.Schema$Field$Order",
353+
"org.apache.avro.Schema$UnionSchema",
354+
"org.apache.avro.Schema$Type",
355+
"org.apache.avro.Schema$LockableArrayList",
356+
"org.apache.avro.Schema$BooleanSchema",
357+
"org.apache.avro.Schema$NullSchema",
358+
"org.apache.avro.Schema$StringSchema",
359+
"org.apache.avro.Schema$IntSchema",
360+
"org.apache.avro.Schema$FloatSchema",
361+
"org.apache.avro.Schema$EnumSchema",
362+
"org.apache.avro.Schema$Name",
363+
"org.apache.avro.Schema$LongSchema",
364+
"org.apache.avro.generic.GenericData$Array",
365+
"org.apache.spark.internal.io.FileCommitProtocol$TaskCommitMessage",
366+
"org.apache.spark.sql.execution.datasources.FileFormatWriter$WriteTaskResult",
367+
"org.apache.spark.sql.execution.datasources.BasicWriteTaskStats",
368+
"org.apache.spark.sql.execution.datasources.ExecutedWriteSummary",
369+
"org.apache.spark.sql.execution.datasources.WriteTaskResult",
370+
"org.apache.spark.sql.types.BooleanType$",
371+
"org.apache.spark.sql.types.DoubleType$",
372+
"org.apache.spark.sql.types.FloatType$",
373+
"org.apache.spark.sql.types.IntegerType$",
374+
"org.apache.spark.sql.types.LongType$",
375+
"org.apache.spark.sql.types.StringType$",
376+
"scala.Tuple2$mcCC$sp",
377+
"scala.collection.Iterator$$anon$11",
378+
"scala.collection.Iterator$$anonfun$toStream$1",
379+
"scala.collection.convert.Wrappers$",
380+
"scala.collection.immutable.Stream$Cons",
381+
"scala.collection.immutable.Stream$Empty$",
382+
"scala.collection.immutable.Set$EmptySet$",
383+
"scala.collection.mutable.ListBuffer$$anon$1",
384+
"scala.reflect.ClassTag$GenericClassTag",
385+
"scala.reflect.ClassTag$$anon$1",
386+
"scala.reflect.ManifestFactory$ClassTypeManifest"
387+
).flatMap { name =>
388+
try {
389+
Some[Class[_]](Class.forName(name))
390+
} catch {
391+
case _: Throwable => None
392+
}
393+
}
428394
}
429395
}

0 commit comments

Comments
 (0)