Skip to content

Remove the calculation of job frequency from core-tools #1630

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
import scala.collection.JavaConverters._

import com.nvidia.spark.rapids.tool.{EventLogInfo, FailedEventLog, PlatformFactory, ToolBase}
import com.nvidia.spark.rapids.tool.qualification.QualOutputWriter.DEFAULT_JOB_FREQUENCY
import com.nvidia.spark.rapids.tool.tuning.{QualificationAutoTunerConfigsProvider, TunerContext}
import com.nvidia.spark.rapids.tool.views.QualRawReportGenerator
import org.apache.hadoop.conf.Configuration
Expand Down Expand Up @@ -72,7 +71,7 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration,
threadPool.shutdownNow()
}
progressBar.foreach(_.finishAll())
val allAppsSum = estimateAppFrequency(allApps.asScala.values.toSeq)
val allAppsSum = allApps.asScala.values.toSeq
// sort order and limit only applies to the report summary text file,
// the csv file we write the entire data in descending order
val sortedDescDetailed = sortDescForDetailedReport(allAppsSum)
Expand All @@ -92,41 +91,11 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration,
// The sums elements is ordered in descending order. so, only we need to reverse it if the order
// is ascending
private def sortForExecutiveSummary(appsSumDesc: Seq[QualificationSummaryInfo],
order: String): Seq[EstimatedSummaryInfo] = {
order: String): Seq[EstimatedAppInfo] = {
if (QualificationArgs.isOrderAsc(order)) {
appsSumDesc.reverse.map(sum =>
EstimatedSummaryInfo(
sum.estimatedInfo, sum.estimatedFrequency.getOrElse(DEFAULT_JOB_FREQUENCY)))
appsSumDesc.reverse.map(_.estimatedInfo)
} else {
appsSumDesc.map(sum => EstimatedSummaryInfo(
sum.estimatedInfo, sum.estimatedFrequency.getOrElse(DEFAULT_JOB_FREQUENCY)))
}
}

// Estimate app frequency based off of all applications in this run, unit jobs per month
private def estimateAppFrequency(
appsSum: Seq[QualificationSummaryInfo]): Seq[QualificationSummaryInfo] = {
val appFrequency = scala.collection.mutable.Map[String, Double]()
var windowStart: Long = Long.MaxValue
var windowEnd: Long = Long.MinValue

appsSum.foreach { sum =>
appFrequency += (sum.appName -> (1.0 + appFrequency.getOrElse(sum.appName, 0.0)))
windowStart = Math.min(sum.startTime, windowStart)
windowEnd = Math.max(windowEnd, sum.startTime + sum.estimatedInfo.appDur)
}
val windowInMonths =
if (windowEnd > windowStart) ((windowEnd - windowStart) / (1000.0*60*60*24*30)) else 1.0
// Scale frequency to per month assuming uniform distribution over the logging window rather
// than the individual applications window. Single run jobs are given a default frequency
val monthlyFrequency = appFrequency.map { case (appName, numApps) => (appName ->
(if (numApps <= 1) DEFAULT_JOB_FREQUENCY else (numApps / windowInMonths).round))
}
appsSum.map { app =>
val frequency = monthlyFrequency.getOrElse(app.appName, DEFAULT_JOB_FREQUENCY)
// Ensure jobs have a valid frequency, rounding up to 1 (monthly)
app.copy(estimatedFrequency =
Option(if (frequency <= 0) 1 else frequency))
appsSumDesc.map(_.estimatedInfo)
}
}

Expand Down Expand Up @@ -244,8 +213,7 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration,
val qWriter = new QualOutputWriter(outputDir, reportReadSchema, printStdout,
order)

qWriter.writeTextReport(allAppsSum,
sortForExecutiveSummary(sortedDescDetailed, order), numRows)
qWriter.writeTextReport(allAppsSum, sortForExecutiveSummary(sortedDescDetailed, order), numRows)
qWriter.writeDetailedCSVReport(sortedDescDetailed)
if (reportSqlLevel) {
qWriter.writePerSqlCSVReport(allAppsSum, maxSQLDescLength)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ class RunningQualificationApp(
Seq(info).map(_.unSupportedExprs.size),
QualOutputWriter.UNSUPPORTED_EXPRS_MAX_SIZE,
QualOutputWriter.UNSUPPORTED_EXPRS.size)
val estimatedFrequencyMaxSize = QualOutputWriter.ESTIMATED_FREQUENCY_MAX_SIZE
val hasClusterTags = info.clusterTags.nonEmpty
val (clusterIdMax, jobIdMax, runNameMax) = if (hasClusterTags) {
(QualOutputWriter.getMaxSizeForHeader(Seq(info).map(
Expand All @@ -229,18 +228,25 @@ class RunningQualificationApp(
appIdMaxSize = info.appId.length,
unSupExecMaxSize = unSupExecMaxSize,
unSupExprMaxSize = unSupExprMaxSize,
estimatedFrequencyMaxSize = estimatedFrequencyMaxSize,
hasClusterTags = hasClusterTags,
clusterIdMaxSize = clusterIdMax,
jobIdMaxSize = jobIdMax,
runNameMaxSize = runNameMax)
val headerStr = QualOutputWriter.constructOutputRowFromMap(appHeadersAndSizes,
delimiter, prettyPrint)
val appInfoStr = QualOutputWriter.constructAppSummaryInfo(
EstimatedSummaryInfo(info.estimatedInfo),
appHeadersAndSizes, appId.length, unSupExecMaxSize, unSupExprMaxSize,
estimatedFrequencyMaxSize, hasClusterTags, clusterIdMax, jobIdMax, runNameMax,
delimiter, prettyPrint)
val appInfoStr =
QualOutputWriter.constructAppSummaryInfo(
sumInfo = info.estimatedInfo,
headersAndSizes = appHeadersAndSizes,
appIdMaxSize = appId.length,
unSupExecMaxSize = unSupExecMaxSize,
unSupExprMaxSize = unSupExprMaxSize,
hasClusterTags = hasClusterTags,
clusterIdMaxSize = clusterIdMax,
jobIdMaxSize = jobIdMax,
runNameMaxSize = runNameMax,
delimiter = delimiter,
prettyPrint = prettyPrint)
headerStr + appInfoStr
case None =>
logWarning(s"Unable to get qualification information for this application")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
import com.nvidia.spark.rapids.tool.{EventLogInfo, Platform}
import com.nvidia.spark.rapids.tool.planparser.{ExecInfo, PlanInfo, SQLPlanParser}
import com.nvidia.spark.rapids.tool.qualification._
import com.nvidia.spark.rapids.tool.qualification.QualOutputWriter.DEFAULT_JOB_FREQUENCY
import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging
Expand Down Expand Up @@ -851,11 +850,6 @@ case class EstimatedAppInfo(
unsupportedExprs: String,
allTagsMap: Map[String, String])

// Used by writers, estimated app summary with estimated frequency
case class EstimatedSummaryInfo(
estimatedInfo: EstimatedAppInfo,
estimatedFrequency: Long = DEFAULT_JOB_FREQUENCY)

// Estimate based on wall clock times for each SQL query
case class EstimatedPerSQLSummaryInfo(
sqlID: Long,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly), Total Core Seconds
"Spark shell","local-1626104300434",1,1500,1469,131104,996,89.7,"","","","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,string>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>;array<string>","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>","NESTED COMPLEX TYPE",1260,1388,129598,493,976,false,"CollectLimit","",30,1564
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"Spark shell","local-1626104300434",1,1500,1469,131104,996,89.7,"","","","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,string>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>;array<string>","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>","NESTED COMPLEX TYPE",1260,1388,129598,493,976,false,"CollectLimit","",1564
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"Spark shell","local-1623876083964",1,119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",30,1599
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"Spark shell","local-1623876083964",1,119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",1599
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"Spark shell","local-1623876083964",1,119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",30,1599
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"Spark shell","local-1623876083964",1,119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",1599
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"Spark shell","app-20211019113801-0001",1,2942,19894,571967,2814,29.76,"","JDBC[*]","","","","",1812,2883,569025,859,19035,false,"CollectLimit;Scan jdbc;Execute CreateViewCommand","",30,9110
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"Spark shell","app-20211019113801-0001",1,2942,19894,571967,2814,29.76,"","JDBC[*]","","","","",1812,2883,569025,859,19035,false,"CollectLimit;Scan jdbc;Execute CreateViewCommand","",9110
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"Spark shell","local-1629446106683",1,1910,6475,17698,1910,27.8,"","","","array<struct<city:string,state:string>>;map<string,map<string,string>>","array<struct<city:string,state:string>>;map<string,map<string,string>>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",221851,132
"Spark shell","local-1623263471760",1,0,0,22937,0,0.0,"","","","","","",0,0,22937,0,0,false,"","",221851,266
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"Spark shell","local-1629446106683",1,1910,6475,17698,1910,27.8,"","","","array<struct<city:string,state:string>>;map<string,map<string,string>>","array<struct<city:string,state:string>>;map<string,map<string,string>>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",132
"Spark shell","local-1623263471760",1,0,0,22937,0,0.0,"","","","","","",0,0,22937,0,0,false,"","",266
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"TPC-DS Like Bench q86","app-20210319163812-1778",1,9910,4320658,26171,9910,0.0,"24","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",30,24270
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"TPC-DS Like Bench q86","app-20210319163812-1778",1,9910,4320658,26171,9910,0.0,"24","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",24270
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"TPC-DS Like Bench q86","app-20210319163812-1778",1,9910,4320658,26171,9910,35.39,"","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",30,24270
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"TPC-DS Like Bench q86","app-20210319163812-1778",1,9910,4320658,26171,9910,35.39,"","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",24270
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
"Spark shell","local-1630045673160",1,1363,3757,21200,1363,34.65,"","","","array<struct<city:string,state:string>>;map<string,map<st","array<struct<city:string,state:string>>;map<string,map<st","NESTED COMPLEX TYPE",1294,716,20239,0,3757,false,"","",30,160
App Name,App ID,Attempt ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Total Core Seconds
"Spark shell","local-1630045673160",1,1363,3757,21200,1363,34.65,"","","","array<struct<city:string,state:string>>;map<string,map<st","array<struct<city:string,state:string>>;map<string,map<st","NESTED COMPLEX TYPE",1294,716,20239,0,3757,false,"","",160
Loading