Skip to content

Commit 6c0b177

Browse files
committed
Copy the Spark 2.X project for upgrade to Spark 3
1 parent 6e36dd7 commit 6c0b177

File tree

89 files changed

+10744
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+10744
-0
lines changed

spark/sql-30/build.gradle

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
import org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin
2+
3+
description = "Elasticsearch Spark (for Spark 2.X)"
4+
5+
apply plugin: 'java-library'
6+
apply plugin: 'scala'
7+
apply plugin: 'es.hadoop.build.integration'
8+
apply plugin: 'spark.variants'
9+
10+
sparkVariants {
11+
capabilityGroup 'org.elasticsearch.spark.sql.variant'
12+
setDefaultVariant "spark20scala211", spark24Version, scala211Version
13+
addFeatureVariant "spark20scala210", spark22Version, scala210Version
14+
15+
all { SparkVariantPlugin.SparkVariant variant ->
16+
String scalaCompileTaskName = project.sourceSets
17+
.getByName(variant.getSourceSetName("main"))
18+
.getCompileTaskName("scala")
19+
20+
project.configurations {
21+
create(variant.configuration('embedded')) {
22+
transitive = false
23+
canBeResolved = true
24+
}
25+
getByName(variant.configuration('implementation')) {
26+
extendsFrom project.configurations.getByName(variant.configuration('embedded'))
27+
}
28+
}
29+
30+
// Configure main compile task
31+
project.getTasks().getByName(scalaCompileTaskName) { ScalaCompile compileScala ->
32+
configure(compileScala.scalaCompileOptions.forkOptions) {
33+
memoryMaximumSize = '1g'
34+
jvmArgs = ['-XX:MaxPermSize=512m']
35+
}
36+
compileScala.scalaCompileOptions.additionalParameters = [
37+
"-feature",
38+
"-unchecked",
39+
"-deprecation",
40+
"-Xfuture",
41+
"-Yno-adapted-args",
42+
"-Ywarn-dead-code",
43+
"-Ywarn-numeric-widen",
44+
"-Xfatal-warnings"
45+
]
46+
}
47+
48+
dependencies {
49+
add(variant.configuration('embedded'), project(":elasticsearch-hadoop-mr"))
50+
add(variant.configuration('embedded'), project(":elasticsearch-spark")) {
51+
capabilities {
52+
requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version")
53+
}
54+
}
55+
56+
add(variant.configuration('api'), "org.scala-lang:scala-library:$variant.scalaVersion")
57+
add(variant.configuration('api'), "org.scala-lang:scala-reflect:$variant.scalaVersion")
58+
add(variant.configuration('api'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") {
59+
exclude group: 'javax.servlet'
60+
exclude group: 'org.apache.hadoop'
61+
}
62+
63+
add(variant.configuration('implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") {
64+
exclude group: 'org.apache.hadoop'
65+
}
66+
add(variant.configuration('implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") {
67+
exclude group: 'org.apache.hadoop'
68+
}
69+
add(variant.configuration('implementation'), "org.slf4j:slf4j-api:1.7.6") {
70+
because 'spark exposes slf4j components in traits that we need to extend'
71+
}
72+
add(variant.configuration('implementation'), "commons-logging:commons-logging:1.1.1")
73+
add(variant.configuration('implementation'), "javax.xml.bind:jaxb-api:2.3.1")
74+
add(variant.configuration('implementation'), "com.google.protobuf:protobuf-java:2.5.0")
75+
add(variant.configuration('implementation'), "org.apache.spark:spark-catalyst_${variant.scalaMajorVersion}:$variant.sparkVersion")
76+
add(variant.configuration('implementation'), "org.apache.spark:spark-yarn_${variant.scalaMajorVersion}:$variant.sparkVersion") {
77+
exclude group: 'org.apache.hadoop'
78+
}
79+
80+
// Scala compiler needs these for arcane reasons, but they are not used in the api nor the runtime
81+
add(variant.configuration('compileOnly'), "com.fasterxml.jackson.core:jackson-annotations:2.6.7")
82+
add(variant.configuration('compileOnly'), "org.json4s:json4s-jackson_${variant.scalaMajorVersion}:3.2.11")
83+
add(variant.configuration('compileOnly'), "org.apache.spark:spark-tags_${variant.scalaMajorVersion}:$variant.sparkVersion")
84+
85+
if ('2.10' == scalaMajorVersion) {
86+
add(variant.configuration('implementation'), "org.apache.spark:spark-unsafe_${variant.scalaMajorVersion}:$variant.sparkVersion")
87+
add(variant.configuration('implementation'), "org.apache.avro:avro:1.7.7")
88+
add(variant.configuration('implementation'), "log4j:log4j:1.2.17")
89+
add(variant.configuration('implementation'), "com.google.code.findbugs:jsr305:2.0.1")
90+
add(variant.configuration('implementation'), "org.json4s:json4s-ast_2.10:3.2.10")
91+
add(variant.configuration('implementation'), "com.esotericsoftware.kryo:kryo:2.21")
92+
add(variant.configuration('compileOnly'), "org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}")
93+
add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}")
94+
add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}")
95+
}
96+
97+
add(variant.configuration('test', 'implementation'), project(":test:shared"))
98+
add(variant.configuration('test', 'implementation'), "org.elasticsearch:securemock:1.2")
99+
add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") {
100+
exclude group: 'javax.servlet'
101+
exclude group: 'org.apache.hadoop'
102+
}
103+
add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") {
104+
exclude group: 'org.apache.hadoop'
105+
}
106+
107+
add(variant.configuration('itest', 'implementation'), project(":test:shared"))
108+
add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-yarn_${variant.scalaMajorVersion}:$variant.sparkVersion") {
109+
exclude group: 'org.apache.hadoop'
110+
}
111+
add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") {
112+
exclude group: 'org.apache.hadoop'
113+
}
114+
115+
add(variant.configuration('additionalSources'), project(":elasticsearch-hadoop-mr"))
116+
add(variant.configuration('javadocSources'), project(":elasticsearch-hadoop-mr"))
117+
118+
add(variant.configuration('additionalSources'), project(":elasticsearch-spark")) {
119+
capabilities {
120+
requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version")
121+
}
122+
}
123+
add(variant.configuration('javadocSources'), project(":elasticsearch-spark")) {
124+
capabilities {
125+
requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version")
126+
}
127+
}
128+
}
129+
130+
def javaFilesOnly = { FileTreeElement spec ->
131+
spec.file.name.endsWith('.java') || spec.isDirectory()
132+
}
133+
134+
// Add java files from scala source set to javadocSourceElements.
135+
project.fileTree("src/main/scala").include(javaFilesOnly).each {
136+
project.artifacts.add(variant.configuration('javadocSourceElements'), it)
137+
}
138+
139+
if (variant.scalaMajorVersion != '2.10') {
140+
// Configure java source generation for javadoc purposes
141+
String generatedJavaDirectory = "$buildDir/generated/java/${variant.name}"
142+
Configuration scalaCompilerPlugin = project.configurations.maybeCreate(variant.configuration('scalaCompilerPlugin'))
143+
scalaCompilerPlugin.defaultDependencies { dependencies ->
144+
dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${variant.scalaVersion}:0.13"))
145+
}
146+
147+
ScalaCompile compileScala = tasks.getByName(scalaCompileTaskName) as ScalaCompile
148+
compileScala.scalaCompileOptions.with {
149+
additionalParameters = [
150+
"-Xplugin:" + configurations.getByName(variant.configuration('scalaCompilerPlugin')).asPath,
151+
"-P:genjavadoc:out=$generatedJavaDirectory".toString()
152+
]
153+
}
154+
// Export generated Java code from the genjavadoc compiler plugin
155+
artifacts {
156+
add(variant.configuration('javadocSourceElements'), project.file(generatedJavaDirectory)) {
157+
builtBy compileScala
158+
}
159+
}
160+
tasks.getByName(variant.taskName('javadoc')) {
161+
dependsOn compileScala
162+
source(generatedJavaDirectory)
163+
}
164+
}
165+
166+
scaladoc {
167+
title = "${rootProject.description} ${version} API"
168+
}
169+
}
170+
}
171+
172+
// deal with the messy conflicts out there
173+
// Ignore the scalaCompilerPlugin configurations since it is immediately resolved to configure the scala compiler tasks
174+
configurations.matching{ it.name.contains('CompilerPlugin') == false }.all { Configuration conf ->
175+
conf.resolutionStrategy {
176+
eachDependency { details ->
177+
// change all javax.servlet artifacts to the one used by Spark otherwise these will lead to
178+
// SecurityException (signer information wrong)
179+
if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) {
180+
details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016"
181+
}
182+
}
183+
}
184+
conf.exclude group: "org.mortbay.jetty"
185+
}
186+
187+
tasks.withType(ScalaCompile) { ScalaCompile task ->
188+
task.sourceCompatibility = project.ext.minimumRuntimeVersion
189+
task.targetCompatibility = project.ext.minimumRuntimeVersion
190+
task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath
191+
}
192+
193+
// Embed the embedded dependencies in the final jar after all configuration is complete
194+
sparkVariants {
195+
all { SparkVariantPlugin.SparkVariant variant ->
196+
tasks.getByName(variant.taskName('jar')) {
197+
dependsOn(project.configurations.getByName(variant.configuration('embedded')))
198+
// TODO: Is there a way to do this lazily? This looks like it resolves the configuration.
199+
from(project.configurations.getByName(variant.configuration('embedded')).collect { it.isDirectory() ? it : zipTree(it)}) {
200+
include "org/elasticsearch/**"
201+
include "esh-build.properties"
202+
include "META-INF/services/*"
203+
}
204+
}
205+
}
206+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
5043bfebc3db072ed80fbd362e7caf00e885d8ae

0 commit comments

Comments
 (0)