trustyai-explainability
diff --git a/‎api/lmes/v1alpha1/lmevaljob_types.go
+30 b/‎api/lmes/v1alpha1/lmevaljob_types.go
+30
diff --git a/‎api/lmes/v1alpha1/zz_generated.deepcopy.go
+66 b/‎api/lmes/v1alpha1/zz_generated.deepcopy.go
+66
diff --git a/‎cmd/lmes_driver/main.go
+3 b/‎cmd/lmes_driver/main.go
+3
diff --git a/‎config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml
+133 b/‎config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml
+133
diff --git a/‎controllers/lmes/driver/driver.go
+37-3 b/‎controllers/lmes/driver/driver.go
+37-3
@@ -181,6 +181,9 @@ func (c *CustomArtifacts) GetTasks() []CustomArtifact {
 // Find details of the Unitxt Recipe here:
 // https://www.unitxt.ai/en/latest/unitxt.standard.html#unitxt.standard.StandardRecipe
 type TaskRecipe struct {
+	// The name of the TaskRecipe
+	// +optional
+	Name *string `json:"name,omitempty"`
 	// The Unitxt dataset card
 	Card Card `json:"card"`
 	// The Unitxt template
@@ -236,11 +239,35 @@ type CustomTasks struct {
 	Source CustomTaskSource `json:"source,omitempty"`
 }
 
+// Define an aggregate metric using 'mean' aggregation.
+type AggregateMetric struct {
+	// The name of the metric to aggregate
+	MetricName string `json:"metricName,omitempty"`
+	// Weight by size or not. Default value is True
+	// +optional
+	WeightBySize *bool `json:"weightBySize,omitempty"`
+}
+
+type TaskGroup struct {
+	// The name of the task group
+	Name string `json:"name"`
+	// TaskNames from lm-eval's task list and/or from custom tasks if CustomTasks is defined
+	// +optional
+	TaskNames []string `json:"taskNames,omitempty"`
+	// Task Recipes specifically for the Unitxt tasks
+	// +optional
+	TaskRecipes []TaskRecipe `json:"taskRecipes,omitempty"`
+	// A list of aggregate metrics to calculate for the task group
+	// +optional
+	AggregateMetrics []AggregateMetric `json:"aggregateMetrics,omitempty"`
+}
+
 type TaskList struct {
 	// TaskNames from lm-eval's task list and/or from custom tasks if CustomTasks is defined
 	TaskNames []string `json:"taskNames,omitempty"`
 	// Task Recipes specifically for Unitxt
 	TaskRecipes []TaskRecipe `json:"taskRecipes,omitempty"`
+	TaskGroups  []TaskGroup  `json:"taskGroups,omitempty"`
 	// Custom Unitxt artifacts that can be used in a TaskRecipe
 	CustomArtifacts *CustomArtifacts `json:"custom,omitempty"`
 	// CustomTasks is a list of external tasks
@@ -340,6 +367,9 @@ func (t *TaskRecipe) String() string {
 	if t.DemosPoolSize != nil {
 		b.WriteString(fmt.Sprintf(",demos_pool_size=%d", *t.DemosPoolSize))
 	}
+	if t.Name != nil && *t.Name != "" {
+		b.WriteString(fmt.Sprintf("|%s", *t.Name))
+	}
 	return b.String()
 }
 
 
@@ -51,6 +51,7 @@ func (t *strArrayArg) String() string {
 
 var (
 	taskRecipes         strArrayArg
+	taskGroups          strArrayArg
 	customArtifactArgs  strArrayArg
 	taskNames           strArrayArg
 	copy                = flag.String("copy", "", "copy this binary to specified destination path")
@@ -70,6 +71,7 @@ var (
 
 func init() {
 	flag.Var(&taskRecipes, "task-recipe", "task recipe")
+	flag.Var(&taskGroups, "task-group", "task group")
 	flag.Var(&customArtifactArgs, "custom-artifact", "A string contains an artifact's type, name and value. Use | as separator")
 	flag.Var(&taskNames, "task-name", "A task name for custom tasks")
 }
@@ -125,6 +127,7 @@ func main() {
 		DetectDevice:        *detectDevice,
 		Logger:              driverLog,
 		TaskRecipes:         taskRecipes,
+		TaskGroups:          taskGroups,
 		CustomArtifacts:     customArtifacts,
 		Args:                args,
 		CommPort:            *commPort,
 
@@ -4819,6 +4819,136 @@ spec:
                             type: object
                         type: object
                     type: object
+                  taskGroups:
+                    items:
+                      properties:
+                        aggregateMetrics:
+                          description: A list of aggregate metrics to calculate for
+                            the task group
+                          items:
+                            description: Define an aggregate metric using 'mean' aggregation.
+                            properties:
+                              metricName:
+                                description: The name of the metric to aggregate
+                                type: string
+                              weightBySize:
+                                description: Weight by size or not. Default value
+                                  is True
+                                type: boolean
+                            type: object
+                          type: array
+                        name:
+                          description: The name of the task group
+                          type: string
+                        taskNames:
+                          description: TaskNames from lm-eval's task list and/or from
+                            custom tasks if CustomTasks is defined
+                          items:
+                            type: string
+                          type: array
+                        taskRecipes:
+                          description: Task Recipes specifically for the Unitxt tasks
+                          items:
+                            description: |-
+                              Use a task recipe to form a custom task. It maps to the Unitxt Recipe
+                              Find details of the Unitxt Recipe here:
+                              https://www.unitxt.ai/en/latest/unitxt.standard.html#unitxt.standard.StandardRecipe
+                            properties:
+                              card:
+                                description: The Unitxt dataset card
+                                properties:
+                                  custom:
+                                    description: |-
+                                      A JSON string for a custom unitxt card which contains the custom dataset.
+                                      Use the documentation here: https://www.unitxt.ai/en/latest/docs/adding_dataset.html#adding-to-the-catalog
+                                      to compose a custom card, store it as a JSON file, and use the JSON content as the value here.
+                                    type: string
+                                  name:
+                                    description: Unitxt card's ID
+                                    type: string
+                                type: object
+                              demosPoolSize:
+                                description: The pool size for the fewshot
+                                type: integer
+                              format:
+                                description: The Unitxt format
+                                type: string
+                              loaderLimit:
+                                description: A limit number of records to load
+                                type: integer
+                              metrics:
+                                description: Metrics
+                                items:
+                                  properties:
+                                    name:
+                                      description: Unitxt metric id
+                                      type: string
+                                    ref:
+                                      description: |-
+                                        The name of the custom metric in the custom field. Its value is a JSON string
+                                        for a custom Unitxt metric. Use the documentation here: https://www.unitxt.ai/en/latest/docs/adding_metric.html#adding-a-new-instance-metric
+                                        to compose a custom metric, store it as a JSON file by calling the
+                                        add_to_catalog API: https://www.unitxt.ai/en/latest/docs/saving_and_loading_from_catalog.html#adding-assets-to-the-catalog,
+                                        and use the JSON content as the value here.
+                                      type: string
+                                  type: object
+                                type: array
+                              name:
+                                description: The name of the TaskRecipe
+                                type: string
+                              numDemos:
+                                description: Number of fewshot
+                                type: integer
+                              systemPrompt:
+                                description: The Unitxt System Prompt
+                                properties:
+                                  name:
+                                    description: Unitxt System Prompt id
+                                    type: string
+                                  ref:
+                                    description: The name of the custom systemPrompt
+                                      in the custom field. Its value is a custom system
+                                      prompt string
+                                    type: string
+                                type: object
+                              task:
+                                description: The Unitxt Task
+                                properties:
+                                  name:
+                                    description: Unitxt task id
+                                    type: string
+                                  ref:
+                                    description: |-
+                                      The name of the custom task in the custom field. Its value is a JSON string
+                                      for a custom Unitxt task. Use the documentation here: https://www.unitxt.ai/en/latest/docs/adding_task.html
+                                      to compose a custom task, store it as a JSON file by calling the
+                                      add_to_catalog API: https://www.unitxt.ai/en/latest/docs/saving_and_loading_from_catalog.html#adding-assets-to-the-catalog,
+                                      and use the JSON content as the value here.
+                                    type: string
+                                type: object
+                              template:
+                                description: The Unitxt template
+                                properties:
+                                  name:
+                                    description: Unitxt template ID
+                                    type: string
+                                  ref:
+                                    description: |-
+                                      The name of the custom template in the custom field. Its value is a JSON string
+                                      for a custom Unitxt template. Use the documentation here: https://www.unitxt.ai/en/latest/docs/adding_template.html
+                                      to compose a custom template, store it as a JSON file by calling the
+                                      add_to_catalog API: https://www.unitxt.ai/en/latest/docs/saving_and_loading_from_catalog.html#adding-assets-to-the-catalog,
+                                      and use the JSON content as the value here.
+                                    type: string
+                                type: object
+                            required:
+                            - card
+                            type: object
+                          type: array
+                      required:
+                      - name
+                      type: object
+                    type: array
                   taskNames:
                     description: TaskNames from lm-eval's task list and/or from custom
                       tasks if CustomTasks is defined
@@ -4872,6 +5002,9 @@ spec:
                                 type: string
                             type: object
                           type: array
+                        name:
+                          description: The name of the TaskRecipe
+                          type: string
                         numDemos:
                           description: Number of fewshot
                           type: integer
 
@@ -60,6 +60,7 @@ type DriverOption struct {
 	DetectDevice        bool
 	TaskRecipesPath     string
 	TaskRecipes         []string
+	TaskGroups          []string
 	CatalogPath         string
 	CustomArtifacts     []CustomArtifact
 	Logger              logr.Logger
@@ -344,6 +345,9 @@ func (d *driverImpl) exec() error {
 	if err := d.createTaskRecipes(); err != nil {
 		return fmt.Errorf("failed to create task recipes: %v", err)
 	}
+	if err := d.createTaskGroups(); err != nil {
+		return fmt.Errorf("failed to create task groups: %v", err)
+	}
 
 	if err := d.prepDir4CustomArtifacts(); err != nil {
 		return fmt.Errorf("failed to create the directories for custom artifacts: %v", err)
@@ -507,12 +511,21 @@ func (d *driverImpl) updateProgress(msg string) {
 }
 
 func (d *driverImpl) createTaskRecipes() error {
-	for i, taskRecipe := range d.Option.TaskRecipes {
+	id := 0
+	for _, rString := range d.Option.TaskRecipes {
+		tokens := strings.SplitN(rString, "|", 2)
+		taskRecipe := tokens[0]
+		taskName := fmt.Sprintf("%s_%d", TaskRecipePrefix, id)
+		if len(tokens) == 2 {
+			taskName = tokens[1]
+		} else {
+			id++
+		}
 		err := os.WriteFile(
-			filepath.Join(d.Option.TaskRecipesPath, fmt.Sprintf("%s_%d.yaml", TaskRecipePrefix, i)),
+			filepath.Join(d.Option.TaskRecipesPath, fmt.Sprintf("%s.yaml", taskName)),
 			[]byte(fmt.Sprintf(
 				"task: %s\ninclude: unitxt\nrecipe: %s",
-				fmt.Sprintf("%s_%d", TaskRecipePrefix, i),
+				taskName,
 				taskRecipe,
 			)),
 			0666,
@@ -524,6 +537,27 @@ func (d *driverImpl) createTaskRecipes() error {
 	return nil
 }
 
+func (d *driverImpl) createTaskGroups() error {
+	for _, rString := range d.Option.TaskGroups {
+		tokens := strings.SplitN(rString, "|", 2)
+		taskGroupName := tokens[0]
+		definition := tokens[1]
+		err := os.WriteFile(
+			filepath.Join(d.Option.TaskRecipesPath, fmt.Sprintf("%s.yaml", taskGroupName)),
+			[]byte(fmt.Sprintf(
+				"group: %s\n%s",
+				taskGroupName,
+				definition,
+			)),
+			0666,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 func (d *driverImpl) prepDir4CustomArtifacts() error {
 	subDirs := []string{"cards", "templates", "system_prompts"}
 	var errs []error