Skip to content

Commit 1f037fa

Browse files
authored
Improve scalastyle rules to detect spaces (#1493)
Signed-off-by: Ahmed Hussein (amahussein) <[email protected]> Contributes to #1158 This pull request includes several changes to the `core/scalastyle-config.xml` file and updates to copyright years across multiple Scala files. Additionally, it includes minor code cleanups. ### Updates to `scalastyle-config.xml`: * Added new style checks: - `SpacesAfterPlusChecker`, `SpacesBeforePlusChecker`, `WhitespaceEndOfLineChecker` - `DisallowSpaceBeforeTokenChecker`, `SingleSpaceBetweenRParenAndLCurlyBrace`, `OmitBracesInCase` - `NewLineAtEofChecker`, `SpaceAfterCommentStartChecker`, `EnsureSingleSpaceBeforeTokenChecker`, `EnsureSingleSpaceAfterTokenChecker` - `TokenChecker` for `println` statements - Disabled checks: `ParameterNumberChecker`, `PublicMethodsHaveTypeChecker`, `NonASCIICharacterChecker`
1 parent bb048d0 commit 1f037fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+258
-176
lines changed

core/scalastyle-config.xml

+58-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
Copyright (c) 2023-2024, NVIDIA CORPORATION. All Rights Reserved.
2+
Copyright (c) 2023-2025, NVIDIA CORPORATION. All Rights Reserved.
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -40,6 +40,12 @@ You can also disable only one rule, by specifying its rule id, as specified in:
4040

4141
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/>
4242

43+
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
44+
45+
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
46+
47+
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
48+
4349
<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
4450
<parameters>
4551
<parameter name="maxLineLength"><![CDATA[100]]></parameter>
@@ -58,6 +64,24 @@ You can also disable only one rule, by specifying its rule id, as specified in:
5864
</parameters>
5965
</check>
6066

67+
<check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
68+
<parameters>
69+
<parameter name="tokens">COMMA</parameter>
70+
</parameters>
71+
</check>
72+
73+
<check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
74+
<parameters><parameter name="regex">\)\{</parameter></parameters>
75+
<customMessage><![CDATA[
76+
Single Space between ')' and `{`.
77+
]]></customMessage>
78+
</check>
79+
80+
<check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
81+
<parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
82+
<customMessage>Omit braces in case clauses.</customMessage>
83+
</check>
84+
6185
<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
6286
<parameters>
6387
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
@@ -92,10 +116,35 @@ You can also disable only one rule, by specifying its rule id, as specified in:
92116
</parameters>
93117
</check>
94118

119+
<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
120+
121+
<check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
122+
123+
<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
124+
<parameters>
125+
<parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
126+
</parameters>
127+
</check>
128+
129+
<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
130+
<parameters>
131+
<parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
132+
</parameters>
133+
</check>
134+
95135
<!-- ??? usually shouldn't be checked into the code base. -->
96136
<check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage"
97137
enabled="true"/>
98138

139+
<!-- Similar to Spark, all printlns need to be wrapped in '// scalastyle:off/on println' -->
140+
<check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
141+
<parameters><parameter name="regex">^println$</parameter></parameters>
142+
<customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
143+
// scalastyle:off println
144+
println(...)
145+
// scalastyle:on println]]></customMessage>
146+
</check>
147+
99148
<check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker"
100149
enabled="true">
101150
<parameters>
@@ -119,4 +168,12 @@ You can also disable only one rule, by specifying its rule id, as specified in:
119168
<!-- This project uses Javadoc rather than Scaladoc so scaladoc checks are disabled -->
120169
<check enabled="false" class="org.scalastyle.scalariform.ScalaDocChecker" level="warning"/>
121170

171+
<check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="false">
172+
<parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
173+
</check>
174+
175+
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"></check>
176+
177+
<!-- Unit test uses ascii characters. So, we need to clean that up first -->
178+
<check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="false"></check>
122179
</scalastyle>

core/src/main/scala/com/nvidia/spark/rapids/tool/AppSummaryInfoBaseProvider.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ class AppSummaryInfoBaseProvider extends AppInfoPropertyGetter
4040
override def getProperty(propKey: String): Option[String] = {
4141
if (propKey.startsWith(ToolUtils.PROPS_RAPIDS_KEY_PREFIX)) {
4242
getRapidsProperty(propKey)
43-
} else if (propKey.startsWith("spark")){
43+
} else if (propKey.startsWith("spark")) {
4444
getSparkProperty(propKey)
4545
} else {
4646
getSystemProperty(propKey)

core/src/main/scala/com/nvidia/spark/rapids/tool/Platform.scala

+8-8
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ abstract class Platform(var gpuDevice: Option[GpuDevice],
432432
/**
433433
* Attempts to get the instance type based on the core and gpu requirements.
434434
*/
435-
def getInstanceByResources(cores:Int, numGpus: Int): Option[InstanceInfo] = None
435+
def getInstanceByResources(cores: Int, numGpus: Int): Option[InstanceInfo] = None
436436

437437
/**
438438
* Recommend a GPU Instance type to use for this application.
@@ -452,7 +452,7 @@ abstract class Platform(var gpuDevice: Option[GpuDevice],
452452
// we could put on a node.
453453
if (origClusterNumExecsPerNode == -1) {
454454
maxGpusSupported
455-
} else {
455+
} else {
456456
origClusterNumExecsPerNode
457457
}
458458
} else {
@@ -611,7 +611,7 @@ class DatabricksAwsPlatform(gpuDevice: Option[GpuDevice],
611611
clusterProperties: Option[ClusterProperties])
612612
extends DatabricksPlatform(gpuDevice, clusterProperties)
613613
with Logging {
614-
override val platformName: String = PlatformNames.DATABRICKS_AWS
614+
override val platformName: String = PlatformNames.DATABRICKS_AWS
615615
override val defaultGpuDevice: GpuDevice = A10GGpu
616616

617617
override def getInstanceByResources(
@@ -665,7 +665,7 @@ class DatabricksAzurePlatform(gpuDevice: Option[GpuDevice],
665665

666666
class DataprocPlatform(gpuDevice: Option[GpuDevice],
667667
clusterProperties: Option[ClusterProperties]) extends Platform(gpuDevice, clusterProperties) {
668-
override val platformName: String = PlatformNames.DATAPROC
668+
override val platformName: String = PlatformNames.DATAPROC
669669
override val defaultGpuDevice: GpuDevice = T4Gpu
670670
override def isPlatformCSP: Boolean = true
671671
override def maxGpusSupported: Int = 4
@@ -694,21 +694,21 @@ class DataprocPlatform(gpuDevice: Option[GpuDevice],
694694
class DataprocServerlessPlatform(gpuDevice: Option[GpuDevice],
695695
clusterProperties: Option[ClusterProperties])
696696
extends DataprocPlatform(gpuDevice, clusterProperties) {
697-
override val platformName: String = PlatformNames.DATAPROC_SL
697+
override val platformName: String = PlatformNames.DATAPROC_SL
698698
override val defaultGpuDevice: GpuDevice = L4Gpu
699699
override def isPlatformCSP: Boolean = true
700700
}
701701

702702
class DataprocGkePlatform(gpuDevice: Option[GpuDevice],
703703
clusterProperties: Option[ClusterProperties])
704704
extends DataprocPlatform(gpuDevice, clusterProperties) {
705-
override val platformName: String = PlatformNames.DATAPROC_GKE
705+
override val platformName: String = PlatformNames.DATAPROC_GKE
706706
override def isPlatformCSP: Boolean = true
707707
}
708708

709709
class EmrPlatform(gpuDevice: Option[GpuDevice],
710710
clusterProperties: Option[ClusterProperties]) extends Platform(gpuDevice, clusterProperties) {
711-
override val platformName: String = PlatformNames.EMR
711+
override val platformName: String = PlatformNames.EMR
712712
override val defaultGpuDevice: GpuDevice = A10GGpu
713713

714714
override def isPlatformCSP: Boolean = true
@@ -755,7 +755,7 @@ class EmrPlatform(gpuDevice: Option[GpuDevice],
755755

756756
class OnPremPlatform(gpuDevice: Option[GpuDevice],
757757
clusterProperties: Option[ClusterProperties]) extends Platform(gpuDevice, clusterProperties) {
758-
override val platformName: String = PlatformNames.ONPREM
758+
override val platformName: String = PlatformNames.ONPREM
759759
// Note we don't have an speedup factor file for onprem l4's but we want auto tuner
760760
// to use L4.
761761
override val defaultGpuDevice: GpuDevice = L4Gpu

core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/DeltaLakeHelper.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ class DLWriteWithFormatAndSchemaParser(node: SparkPlanGraphNode,
5555
object DeltaLakeHelper {
5656
// we look for the serdeLibrary which is part of the node description:
5757
// Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
58-
//private val serdeRegex = "Serde Library: ([\\w.]+)".r
58+
// private val serdeRegex = "Serde Library: ([\\w.]+)".r
5959
private val serdeRegex = "Serde Library: ([\\w.]+)".r
6060
// We look for the schema in the node description. It is in the following format
6161
// Schema: root

core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/ReadParser.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -86,7 +86,7 @@ object ReadParser extends Logging {
8686
}
8787

8888
// This tries to get just the field specified by tag in a string that
89-
// may contain multiple fields. It looks for a comma to delimit fields.
89+
// may contain multiple fields. It looks for a comma to delimit fields.
9090
private def getFieldWithoutTag(str: String, tag: String): String = {
9191
val index = str.indexOf(tag)
9292
// remove the tag from the final string returned
@@ -187,7 +187,6 @@ object ReadParser extends Logging {
187187
}
188188
ReadMetaData(schema, location, fileFormat, tags = extractReadTags(node.desc))
189189
}
190-
191190
}
192191

193192
// For the read score we look at the read format and datatypes for each

core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -67,7 +67,7 @@ object UnsupportedReasons extends Enumeration {
6767
case IS_UNSUPPORTED => "Unsupported"
6868
case CONTAINS_UNSUPPORTED_EXPR => "Contains unsupported expr"
6969
case UNSUPPORTED_IO_FORMAT => "Unsupported IO format"
70-
case customReason @ _ => customReason.toString
70+
case customReason @ _ => customReason.toString
7171
}
7272
}
7373
}
@@ -163,7 +163,7 @@ case class ExecInfo(
163163
OpActions.IgnoreNoPerf
164164
} else if (shouldIgnore) {
165165
OpActions.IgnorePerf
166-
} else {
166+
} else {
167167
OpActions.Triage
168168
}
169169
}
@@ -296,7 +296,7 @@ object ExecInfo {
296296
children: Option[Seq[ExecInfo]], // only one level deep
297297
stages: Set[Int] = Set.empty,
298298
shouldRemove: Boolean = false,
299-
unsupportedExecReason:String = "",
299+
unsupportedExecReason: String = "",
300300
unsupportedExprs: Seq[UnsupportedExprOpRef] = Seq.empty,
301301
dataSet: Boolean = false,
302302
udf: Boolean = false,
@@ -660,7 +660,7 @@ object SQLPlanParser extends Logging {
660660
maxDuration
661661
}
662662

663-
private def ignoreExpression(expr:String): Boolean = {
663+
private def ignoreExpression(expr: String): Boolean = {
664664
ignoreExpressions.contains(expr.toLowerCase)
665665
}
666666

@@ -789,7 +789,7 @@ object SQLPlanParser extends Logging {
789789
parsedExpressions.toArray
790790
}
791791

792-
def parseWindowExpressions(exprStr:String): Array[String] = {
792+
def parseWindowExpressions(exprStr: String): Array[String] = {
793793
val parsedExpressions = ArrayBuffer[String]()
794794
// [sum(cast(level#30 as bigint)) windowspecdefinition(device#29, id#28 ASC NULLS FIRST,
795795
// specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum#35L,

core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/DriverLogProcessor.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -64,7 +64,6 @@ class DriverLogProcessor(logPath: String)
6464
}
6565
}
6666

67-
6867
object BaseDriverLogInfoProvider {
6968
def noneDriverLog: BaseDriverLogInfoProvider = new BaseDriverLogInfoProvider()
70-
}
69+
}

core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/GenerateTimeline.scala

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ class TimelineTaskInfo(val stageId: Int, val taskId: Long,
3535

3636
class TimelineStageInfo(val stageId: Int,
3737
startTime: Long,
38-
endTime:Long,
38+
endTime: Long,
3939
val duration: Long) extends TimelineTiming(startTime, endTime)
4040

4141
class TimelineJobInfo(val jobId: Int,
@@ -279,7 +279,7 @@ object GenerateTimeline {
279279
.flatMap(_.taskUpdatesMap.values).sum
280280

281281
val semMetricsMs = app.accumManager.accumInfoMap.flatMap {
282-
case (_,accumInfo: AccumInfo)
282+
case (_, accumInfo: AccumInfo)
283283
if accumInfo.infoRef.name == AccumNameRef.NAMES_TABLE.get("gpuSemaphoreWait") =>
284284
Some(accumInfo.taskUpdatesMap.values.sum)
285285
case _ => None

core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/HealthCheck.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -47,7 +47,7 @@ class HealthCheck(apps: Seq[ApplicationInfo]) {
4747
ProfRemovedExecutorView.getRawView(apps)
4848
}
4949

50-
//Function to list all *possible* not-supported plan nodes if GPU Mode=on
50+
// Function to list all *possible* not-supported plan nodes if GPU Mode=on
5151
def getPossibleUnsupportedSQLPlan: Seq[UnsupportedOpsProfileResult] = {
5252
val res = apps.flatMap { app =>
5353
app.planMetricProcessor.unsupportedSQLPlan.map { unsup =>

core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileClassWarehouse.scala

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -203,7 +203,7 @@ class SQLExecutionInfoClass(
203203
}
204204

205205
case class SQLAccumProfileResults(appIndex: Int, sqlID: Long, nodeID: Long,
206-
nodeName: String, accumulatorId: Long, name: String, min: Long, median:Long,
206+
nodeName: String, accumulatorId: Long, name: String, min: Long, median: Long,
207207
max: Long, total: Long, metricType: String, stageIds: String) extends ProfileResult {
208208
override val outputHeaders = Seq("appIndex", "sqlID", "nodeID", "nodeName", "accumulatorId",
209209
"name", "min", "median", "max", "total", "metricType", "stageIds")
@@ -310,7 +310,7 @@ case class AppInfoProfileResults(appIndex: Int, appName: String,
310310

311311
override def convertToSeq: Seq[String] = {
312312
Seq(appIndex.toString, appName, appId.getOrElse(""),
313-
sparkUser, startTime.toString, endTimeToStr, durToStr,
313+
sparkUser, startTime.toString, endTimeToStr, durToStr,
314314
durationStr, sparkRuntime.toString, sparkVersion, pluginEnabled.toString)
315315
}
316316
override def convertToCSVSeq: Seq[String] = {
@@ -1105,7 +1105,7 @@ case class WholeStageCodeGenResults(
11051105
}
11061106
}
11071107

1108-
case class RecommendedPropertyResult(property: String, value: String){
1108+
case class RecommendedPropertyResult(property: String, value: String) {
11091109
override def toString: String = "--conf %s=%s".format(property, value)
11101110
}
11111111

0 commit comments

Comments
 (0)