Skip to content

Commit b446b5d

Browse files
authored
Add support for Spark 3.0 (#1592) (#1599)
Spark 3.0 support is added via a new source directory and subproject. Supported Scala versions are only 2.12.
1 parent 7605515 commit b446b5d

File tree

92 files changed

+10736
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+10736
-0
lines changed

gradle.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ spark13Version = 1.6.2
2626
spark20Version = 2.3.0
2727
spark22Version = 2.2.3
2828
spark24Version = 2.4.4
29+
spark30Version = 3.0.1
2930

3031
# same as Spark's
3132
scala210Version = 2.10.7

settings.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ include 'sql-20'
1919
project(":sql-20").projectDir = new File(settingsDir, "spark/sql-20")
2020
project(":sql-20").name = "elasticsearch-spark-20"
2121

22+
include 'sql-30'
23+
project(":sql-30").projectDir = new File(settingsDir, "spark/sql-30")
24+
project(":sql-30").name = "elasticsearch-spark-30"
25+
2226
include 'storm'
2327
project(":storm").name = "elasticsearch-storm"
2428

spark/core/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ apply plugin: 'spark.variants'
1010
sparkVariants {
1111
capabilityGroup 'org.elasticsearch.spark.variant'
1212
setCoreDefaultVariant "spark20scala212", spark24Version, scala212Version
13+
addCoreFeatureVariant "spark30scala212", spark30Version, scala212Version
1314
addCoreFeatureVariant "spark20scala211", spark24Version, scala211Version
1415
addCoreFeatureVariant "spark20scala210", spark22Version, scala210Version
1516
addCoreFeatureVariant "spark13scala211", spark13Version, scala211Version

spark/sql-30/build.gradle

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin
2+
3+
description = "Elasticsearch Spark (for Spark 3.X)"
4+
5+
apply plugin: 'java-library'
6+
apply plugin: 'scala'
7+
apply plugin: 'es.hadoop.build.integration'
8+
apply plugin: 'spark.variants'
9+
10+
sparkVariants {
11+
capabilityGroup 'org.elasticsearch.spark.sql.variant'
12+
setDefaultVariant "spark30scala212", spark30Version, scala212Version
13+
14+
all { SparkVariantPlugin.SparkVariant variant ->
15+
String scalaCompileTaskName = project.sourceSets
16+
.getByName(variant.getSourceSetName("main"))
17+
.getCompileTaskName("scala")
18+
19+
project.configurations {
20+
create(variant.configuration('embedded')) {
21+
transitive = false
22+
canBeResolved = true
23+
}
24+
getByName(variant.configuration('implementation')) {
25+
extendsFrom project.configurations.getByName(variant.configuration('embedded'))
26+
}
27+
}
28+
29+
// Configure main compile task
30+
project.getTasks().getByName(scalaCompileTaskName) { ScalaCompile compileScala ->
31+
configure(compileScala.scalaCompileOptions.forkOptions) {
32+
memoryMaximumSize = '1g'
33+
jvmArgs = ['-XX:MaxPermSize=512m']
34+
}
35+
compileScala.scalaCompileOptions.additionalParameters = [
36+
"-feature",
37+
"-unchecked",
38+
"-deprecation",
39+
"-Xfuture",
40+
"-Yno-adapted-args",
41+
"-Ywarn-dead-code",
42+
"-Ywarn-numeric-widen",
43+
"-Xfatal-warnings"
44+
]
45+
}
46+
47+
dependencies {
48+
add(variant.configuration('embedded'), project(":elasticsearch-hadoop-mr"))
49+
add(variant.configuration('embedded'), project(":elasticsearch-spark")) {
50+
capabilities {
51+
requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version")
52+
}
53+
}
54+
55+
add(variant.configuration('api'), "org.scala-lang:scala-library:$variant.scalaVersion")
56+
add(variant.configuration('api'), "org.scala-lang:scala-reflect:$variant.scalaVersion")
57+
add(variant.configuration('api'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") {
58+
exclude group: 'javax.servlet'
59+
exclude group: 'org.apache.hadoop'
60+
}
61+
62+
add(variant.configuration('implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") {
63+
exclude group: 'org.apache.hadoop'
64+
}
65+
add(variant.configuration('implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") {
66+
exclude group: 'org.apache.hadoop'
67+
}
68+
add(variant.configuration('implementation'), "org.slf4j:slf4j-api:1.7.6") {
69+
because 'spark exposes slf4j components in traits that we need to extend'
70+
}
71+
add(variant.configuration('implementation'), "commons-logging:commons-logging:1.1.1")
72+
add(variant.configuration('implementation'), "javax.xml.bind:jaxb-api:2.3.1")
73+
add(variant.configuration('implementation'), "com.google.protobuf:protobuf-java:2.5.0")
74+
add(variant.configuration('implementation'), "org.apache.spark:spark-catalyst_${variant.scalaMajorVersion}:$variant.sparkVersion")
75+
add(variant.configuration('implementation'), "org.apache.spark:spark-yarn_${variant.scalaMajorVersion}:$variant.sparkVersion") {
76+
exclude group: 'org.apache.hadoop'
77+
}
78+
79+
// Scala compiler needs these for arcane reasons, but they are not used in the api nor the runtime
80+
add(variant.configuration('compileOnly'), "com.fasterxml.jackson.core:jackson-annotations:2.6.7")
81+
add(variant.configuration('compileOnly'), "org.json4s:json4s-jackson_${variant.scalaMajorVersion}:3.6.6")
82+
add(variant.configuration('compileOnly'), "org.json4s:json4s-ast_${variant.scalaMajorVersion}:3.6.6")
83+
add(variant.configuration('compileOnly'), "org.apache.spark:spark-tags_${variant.scalaMajorVersion}:$variant.sparkVersion")
84+
85+
add(variant.configuration('test', 'implementation'), project(":test:shared"))
86+
add(variant.configuration('test', 'implementation'), "org.elasticsearch:securemock:1.2")
87+
add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") {
88+
exclude group: 'javax.servlet'
89+
exclude group: 'org.apache.hadoop'
90+
}
91+
add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") {
92+
exclude group: 'org.apache.hadoop'
93+
}
94+
95+
add(variant.configuration('itest', 'implementation'), project(":test:shared"))
96+
add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-yarn_${variant.scalaMajorVersion}:$variant.sparkVersion") {
97+
exclude group: 'org.apache.hadoop'
98+
}
99+
add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") {
100+
exclude group: 'org.apache.hadoop'
101+
}
102+
103+
add(variant.configuration('additionalSources'), project(":elasticsearch-hadoop-mr"))
104+
add(variant.configuration('javadocSources'), project(":elasticsearch-hadoop-mr"))
105+
106+
add(variant.configuration('additionalSources'), project(":elasticsearch-spark")) {
107+
capabilities {
108+
requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version")
109+
}
110+
}
111+
add(variant.configuration('javadocSources'), project(":elasticsearch-spark")) {
112+
capabilities {
113+
requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version")
114+
}
115+
}
116+
}
117+
118+
def javaFilesOnly = { FileTreeElement spec ->
119+
spec.file.name.endsWith('.java') || spec.isDirectory()
120+
}
121+
122+
// Add java files from scala source set to javadocSourceElements.
123+
project.fileTree("src/main/scala").include(javaFilesOnly).each {
124+
project.artifacts.add(variant.configuration('javadocSourceElements'), it)
125+
}
126+
127+
// Configure java source generation for javadoc purposes
128+
String generatedJavaDirectory = "$buildDir/generated/java/${variant.name}"
129+
Configuration scalaCompilerPlugin = project.configurations.maybeCreate(variant.configuration('scalaCompilerPlugin'))
130+
scalaCompilerPlugin.defaultDependencies { dependencies ->
131+
dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${variant.scalaVersion}:0.13"))
132+
}
133+
134+
ScalaCompile compileScala = tasks.getByName(scalaCompileTaskName) as ScalaCompile
135+
compileScala.scalaCompileOptions.with {
136+
additionalParameters = [
137+
"-Xplugin:" + configurations.getByName(variant.configuration('scalaCompilerPlugin')).asPath,
138+
"-P:genjavadoc:out=$generatedJavaDirectory".toString()
139+
]
140+
}
141+
// Export generated Java code from the genjavadoc compiler plugin
142+
artifacts {
143+
add(variant.configuration('javadocSourceElements'), project.file(generatedJavaDirectory)) {
144+
builtBy compileScala
145+
}
146+
}
147+
tasks.getByName(variant.taskName('javadoc')) {
148+
dependsOn compileScala
149+
source(generatedJavaDirectory)
150+
}
151+
152+
scaladoc {
153+
title = "${rootProject.description} ${version} API"
154+
}
155+
}
156+
}
157+
158+
// deal with the messy conflicts out there
159+
// Ignore the scalaCompilerPlugin configurations since it is immediately resolved to configure the scala compiler tasks
160+
configurations.matching{ it.name.contains('CompilerPlugin') == false }.all { Configuration conf ->
161+
conf.resolutionStrategy {
162+
eachDependency { details ->
163+
// change all javax.servlet artifacts to the one used by Spark otherwise these will lead to
164+
// SecurityException (signer information wrong)
165+
if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) {
166+
details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016"
167+
}
168+
}
169+
}
170+
conf.exclude group: "org.mortbay.jetty"
171+
}
172+
173+
tasks.withType(ScalaCompile) { ScalaCompile task ->
174+
task.sourceCompatibility = project.ext.minimumRuntimeVersion
175+
task.targetCompatibility = project.ext.minimumRuntimeVersion
176+
task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath
177+
}
178+
179+
// Embed the embedded dependencies in the final jar after all configuration is complete
180+
sparkVariants {
181+
all { SparkVariantPlugin.SparkVariant variant ->
182+
tasks.getByName(variant.taskName('jar')) {
183+
dependsOn(project.configurations.getByName(variant.configuration('embedded')))
184+
// TODO: Is there a way to do this lazily? This looks like it resolves the configuration.
185+
from(project.configurations.getByName(variant.configuration('embedded')).collect { it.isDirectory() ? it : zipTree(it)}) {
186+
include "org/elasticsearch/**"
187+
include "esh-build.properties"
188+
include "META-INF/services/*"
189+
}
190+
}
191+
}
192+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
5043bfebc3db072ed80fbd362e7caf00e885d8ae

0 commit comments

Comments
 (0)