diff --git a/docs/simulation_and_benchmarking/rai_bench.md b/docs/simulation_and_benchmarking/rai_bench.md index d07905ac5..be31f4a05 100644 --- a/docs/simulation_and_benchmarking/rai_bench.md +++ b/docs/simulation_and_benchmarking/rai_bench.md @@ -109,7 +109,7 @@ The `Validator` class can combine single or multiple subtasks to create a single ### Task -A Task represents a specific prompt and set of tools available. A list of validators is assigned to validate the performance. +A Task represents a specific prompts and set of tools available. A list of validators is assigned to validate the performance. ??? info "Task class definition" @@ -117,20 +117,52 @@ A Task represents a specific prompt and set of tools available. A list of valida As you can see, the framework is very flexible. Any SubTask can be combined into any Validator that can be later assigned to any Task. +Every Task needs to define it's prompt and system prompt, what tools agent will have available, how many tool calls are required to complete it and how many optional tool calls are possible. + +Optional tool calls mean that a certain tool calls is not obligatory to pass the Task, but shoudn't be considered an error, example: `GetROS2RGBCameraTask` which has prompt: `Get RGB camera image.` requires making one tool call with `get_ros2_image` tool. But listing topics before doing it is a valid approach, so in this case opitonal tool calls is `1`. + ### ToolCallingAgentBenchmark The ToolCallingAgentBenchmark class manages the execution of tasks and collects results. ### Available Tasks -Tasks of this benchmark are grouped by type: +There are predefined Tasks available which are grouped by categories: -- Basic - basic usage of tools +- Basic - require retrieving info from certain topics - Navigation - Spatial reasoning - questions about surroundings with images attached - Manipulation - Custom Interfaces - requires using messages with custom interfaces -If you want to know details about every task, visit `rai_bench/tool_calling_agent/tasks` +Every Task has assigned the `complexity` which reflects the difficulty. + +When creating a Task, you can define few params: + +```python +class TaskArgs(BaseModel): + """Holds the configurations specified by user""" + + extra_tool_calls: int = 0 + prompt_detail: Literal["brief", "descriptive"] = "brief" + examples_in_system_prompt: Literal[0, 2, 5] = 0 +``` -## Test Models +- examples_in_system_prompt - How many examples there are in system prompts, example: + + - `0`: `You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system. Be proactive and use the tools to answer questions.` + - `2`: `You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system. Be proactive and use the tools to answer questions. Example of tool calls: get_ros2_message_interface, args: {'msg_type': 'geometry_msgs/msg/Twist'} publish_ros2_message, args: {'topic': '/cmd_vel', 'message_type': 'geometry_msgs/msg/Twist', 'message': {linear: {x: 0.5, y: 0.0, z: 0.0}, angular: {x: 0.0, y: 0.0, z: 1.0}}}` + +- prompt_detail - How descriptive should the Task prompt be, example: + + - `brief`: "Get all camera images" + - `descriptive`: "Get all camera images from all available camera sources in the system. + This includes both RGB color images and depth images. + You can discover what camera topics are available and capture images from each." + + Descriptive prompts provides guidance and tips. + +- extra_tool_calls - How many extra tool calls an agent can make and still pass the Task, example: + - `GetROS2RGBCameraTask` has 1 required tool call and 1 optional. When `extra_tool_calls` set to 5, agent can correct himself couple times and still pass even with 7 tool calls. There can be 2 types of invalid tool calls, first when the tool is used incorrectly and agent receives an error - this allows him to correct himself easier. Second type is when tool is called properly but it is not the tool that should be called or it is called with wrong params. In this case agent won't get any error so it will be harder for him to correct, but BOTH of these cases are counted as `extra tool call`. + +If you want to know details about every task, visit `rai_bench/tool_calling_agent/tasks` diff --git a/docs/tutorials/benchmarking.md b/docs/tutorials/benchmarking.md index fb4cb663d..deb9d20a9 100644 --- a/docs/tutorials/benchmarking.md +++ b/docs/tutorials/benchmarking.md @@ -53,12 +53,12 @@ If your goal is creating custom tasks and scenarios, visit [Creating Custom Task This benchmark does not require any additional setup besides the main one [Basic Setup](../setup/install.md), just run: ```bash -python src/rai_bench/rai_bench/examples/tool_calling_agent.py --model-name --vendor --extra-tool-calls <5> --task-types --out-dir +python src/rai_bench/rai_bench/examples/tool_calling_agent.py --model-name --vendor --extra-tool-calls <0 5> --task-types basic --n-shots <0 2> --prompt-detail --complexities --out-dir ``` !!! note - This Benchmark is significantly faster, but still if just trying out, we recommend choosing just one task-type. + This Benchmark is significantly faster, but still, if just trying out, we recommend choosing just one parameter per flag as every combination on params will create more tasks. ## Testing Models @@ -90,12 +90,17 @@ if __name__ == "__main__": ], repeats=1, # how many times to repeat ) - tool_conf = ToolCallingAgentBenchmarkConfig( - extra_tool_calls=5, # how many extra tool calls allowed to still pass + tool_conf = ToolCallingAgentBenchmarkConfig( + extra_tool_calls=[0, 5], # how many extra tool calls allowed to still pass task_types=[ # what types of tasks to include "basic", "spatial_reasoning", - "manipulation", + "custom_interfaces", + ], + N_shots=[0, 2], # examples in system prompt + prompt_detail=[ # how descriptive should task prompt be + "brief", + "descriptive" ], repeats=1, ) @@ -222,6 +227,21 @@ class ThrowObjectsOffTableTask(ManipulationTask): incorrect: int = len(selected_type_objects) - correct return correct, incorrect + +# configure existing Task with different params +target_coords = (0.1, 0.1) +disp = 0.1 +task = PlaceObjectAtCoordTask( + obj_type="apple", + target_position=target_coords, + allowable_displacement=disp, +) + +Scenario( + task=task, + scene_config=scene_config, + scene_config_path=path_to_your_config +) ``` As `obj_types` is parameterizable, it enables various variants of this Task. In combination with a lot of simulation configs available, it means that a single Task can provide dozens of scenarios. @@ -240,23 +260,14 @@ from rai_bench.tool_calling_agent.subtasks import ( from rai_bench.tool_calling_agent.validators import ( OrderedCallsValidator, ) -from rai_bench.tool_calling_agent.tasks.basic import BasicTask from rai_bench.tool_calling_agent.mocked_tools import ( MockGetROS2TopicsNamesAndTypesTool, ) +from rai_bench.tool_calling_agent.interfaces import Task, TaskArgs from langchain_core.tools import BaseTool from typing import List -# configure existing Task with different params -target_coords = (0.1, 0.1) -disp = 0.1 -task = PlaceObjectAtCoordTask( - obj_type="apple", - target_position=target_coords, - allowable_displacement=disp, -) -Scenario(task=task, scene_config=scene_config, scene_config_path=path_to_your_config) # define subtask that requires receive_robot_pos_subtask = CheckArgsToolCallSubTask( @@ -270,7 +281,7 @@ receive_robot_pos_subtask = CheckArgsToolCallSubTask( topics_ord_val = OrderedCallsValidator(subtasks=[receive_robot_pos_subtask]) -class GetROS2RobotPositionTask(BasicTask): +class GetROS2RobotPositionTask(Task): complexity = "easy" @property @@ -287,9 +298,18 @@ class GetROS2RobotPositionTask(BasicTask): ), ] + def get_system_prompt(self) -> str: + return "You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system." + def get_prompt(self) -> str: return "Get the position of the robot." + @property + def optional_tool_calls_number(self) -> int: + # Listing topics before getting any message + return 1 + # optionally pass number of extra tool calls -task = GetROS2RobotPositionTask(validators=[topics_ord_val], extra_tool_calls=1) +args = TaskArgs(extra_tool_calls=0) +task = GetROS2RobotPositionTask(validators=[topics_ord_val], task_args=args) ``` diff --git a/src/rai_bench/pyproject.toml b/src/rai_bench/pyproject.toml index 33d8f1365..010d8aca5 100644 --- a/src/rai_bench/pyproject.toml +++ b/src/rai_bench/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rai-bench" -version = "0.1.0" +version = "0.2.0" description = "Package for running and creating benchmarks." authors = ["Jakub Matejczyk ", "Magdalena Kotynia "] readme = "README.md" diff --git a/src/rai_bench/rai_bench/examples/benchmarking_models.py b/src/rai_bench/rai_bench/examples/benchmarking_models.py index 3a3feefa3..c4008eef4 100644 --- a/src/rai_bench/rai_bench/examples/benchmarking_models.py +++ b/src/rai_bench/rai_bench/examples/benchmarking_models.py @@ -20,11 +20,11 @@ if __name__ == "__main__": # Define models you want to benchmark - model_names = ["qwen2.5:7b", "llama3.2:3b"] - vendors = ["ollama", "ollama"] + model_names = ["qwen2.5:7b"] + vendors = ["ollama"] # Define benchmarks that will be used - man_conf = ManipulationO3DEBenchmarkConfig( + mani_conf = ManipulationO3DEBenchmarkConfig( o3de_config_path="src/rai_bench/rai_bench/manipulation_o3de/predefined/configs/o3de_config.yaml", # path to your o3de config levels=[ # define what difficulty of tasks to include in benchmark "trivial", @@ -32,12 +32,16 @@ repeats=1, # how many times to repeat ) tool_conf = ToolCallingAgentBenchmarkConfig( - extra_tool_calls=5, # how many extra tool calls allowed to still pass + extra_tool_calls=[0], # how many extra tool calls allowed to still pass task_types=[ # what types of tasks to include "basic", "spatial_reasoning", + # "navigation", + "custom_interfaces", "manipulation", ], + N_shots=[2], # examples in system prompt + prompt_detail=["brief", "descriptive"], # how descriptive should task prompt be repeats=1, ) @@ -45,6 +49,6 @@ test_models( model_names=model_names, vendors=vendors, - benchmark_configs=[man_conf, tool_conf], + benchmark_configs=[tool_conf], out_dir=out_dir, ) diff --git a/src/rai_bench/rai_bench/examples/tool_calling_agent.py b/src/rai_bench/rai_bench/examples/tool_calling_agent.py index 321ca9fbb..12ae38d56 100644 --- a/src/rai_bench/rai_bench/examples/tool_calling_agent.py +++ b/src/rai_bench/rai_bench/examples/tool_calling_agent.py @@ -34,6 +34,8 @@ extra_tool_calls=args.extra_tool_calls, complexities=args.complexities, task_types=args.task_types, + n_shots=args.n_shots, + prompt_detail=args.prompt_detail, ) for task in tasks: task.set_logger(bench_logger) diff --git a/src/rai_bench/rai_bench/results_processing/data_loading.py b/src/rai_bench/rai_bench/results_processing/data_loading.py index fee969867..e33c3665a 100644 --- a/src/rai_bench/rai_bench/results_processing/data_loading.py +++ b/src/rai_bench/rai_bench/results_processing/data_loading.py @@ -70,20 +70,19 @@ def convert_row_to_task_result(row: pd.Series) -> TaskResult: ) validator_results.append(validator_result) - return TaskResult( - task_prompt=row["task_prompt"], - system_prompt=row["system_prompt"], - complexity=row["complexity"], - type=row["type"], - model_name=row["model_name"], - validation_info=validator_results, - extra_tool_calls=int(row["extra_tool_calls"]), - extra_tool_calls_used=int(row["extra_tool_calls_used"]), - score=float(row["score"]), - total_time=float(row["total_time"]), - run_id=uuid.UUID(row["run_id"]), + row.update( + { + "validation_info": validator_results, + "extra_tool_calls": int(row["extra_tool_calls"]), + "extra_tool_calls_used": int(row["extra_tool_calls_used"]), + "score": float(row["score"]), + "total_time": float(row["total_time"]), + "run_id": uuid.UUID(row["run_id"]), + } ) + return TaskResult(**row) + def convert_row_to_scenario_result(row: pd.Series) -> ScenarioResult: """ @@ -100,10 +99,7 @@ def convert_row_to_scenario_result(row: pd.Series) -> ScenarioResult: A ScenarioResult object """ return ScenarioResult( - task_prompt=row["task_prompt"], - system_prompt=row["system_prompt"], - model_name=row["model_name"], - scene_config_path=row["scene_config_path"], + **row, score=float(row["score"]), total_time=float(row["total_time"]), number_of_tool_calls=int(row["number_of_tool_calls"]), diff --git a/src/rai_bench/rai_bench/results_processing/data_processing.py b/src/rai_bench/rai_bench/results_processing/data_processing.py index 755d3ded4..84073ad63 100644 --- a/src/rai_bench/rai_bench/results_processing/data_processing.py +++ b/src/rai_bench/rai_bench/results_processing/data_processing.py @@ -181,10 +181,14 @@ def create_task_metrics_dataframe( def create_task_details_dataframe( - model_results: ModelResults, task_type: Optional[str] = None + model_results: ModelResults, + task_type: Optional[str] = None, + complexity: Optional[str] = None, + examples_in_system_prompt: Optional[int] = None, + prompt_detail: Optional[str] = None, ) -> pd.DataFrame: """ - Create a DataFrame with task details, optionally filtered by task type. + Create a DataFrame with task details, optionally filtered by multiple criteria. Parameters ---------- @@ -192,6 +196,10 @@ def create_task_details_dataframe( The model results object task_type : Optional[str] Task type to filter by + complexity : Optional[str] + Complexity to filter by + examples_in_system_prompt : Optional[str] + Examples in system prompt to filter by Returns ------- @@ -201,14 +209,30 @@ def create_task_details_dataframe( all_detailed_results = get_all_detailed_results_from_model_results( model_results=model_results ) - if not all_detailed_results: return pd.DataFrame() - # filter by task type + # Apply filters if task_type: all_detailed_results = [r for r in all_detailed_results if r.type == task_type] + if complexity: + all_detailed_results = [ + r for r in all_detailed_results if r.complexity == complexity + ] + + if examples_in_system_prompt: + all_detailed_results = [ + r + for r in all_detailed_results + if r.examples_in_system_prompt == examples_in_system_prompt + ] + + if prompt_detail: + all_detailed_results = [ + r for r in all_detailed_results if r.prompt_detail == prompt_detail + ] + rows: List[Dict[str, Any]] = [ { "task_prompt": result.task_prompt, @@ -217,10 +241,10 @@ def create_task_details_dataframe( "score": result.score, "total_time": result.total_time, "extra_tool_calls_used": result.extra_tool_calls_used, + "examples_in_system_prompt": result.examples_in_system_prompt, } for result in all_detailed_results ] - return pd.DataFrame(rows) diff --git a/src/rai_bench/rai_bench/results_processing/visualise/tool_calling_agent_display.py b/src/rai_bench/rai_bench/results_processing/visualise/tool_calling_agent_display.py index 98f42daec..558a7626f 100644 --- a/src/rai_bench/rai_bench/results_processing/visualise/tool_calling_agent_display.py +++ b/src/rai_bench/rai_bench/results_processing/visualise/tool_calling_agent_display.py @@ -100,20 +100,20 @@ def display_task_type_performance(model_results: ModelResults): st.plotly_chart(fig_type_calls, use_container_width=True) # type: ignore -def display_task_complexity_performance(model_results: ModelResults): +def display_task_performance_by_field(model_results: ModelResults, field: str): """Display performance charts by task complexity.""" - complexity_df = create_task_metrics_dataframe(model_results, "complexity") + metric_df = create_task_metrics_dataframe(model_results, field) - if complexity_df.empty: + if metric_df.empty: st.warning("No complexity data available.") return fig_complexity_score = create_bar_chart( - df=complexity_df, - x_column="complexity", + df=metric_df, + x_column=field, y_column="avg_score", - title="Success Rate by Task Complexity", - x_label="Task Complexity", + title=f"Success Rate by {field}", + x_label=field, y_label="Avg Score", y_range=(0.0, 1.0), count_column="total_tasks", @@ -121,63 +121,43 @@ def display_task_complexity_performance(model_results: ModelResults): st.plotly_chart(fig_complexity_score, use_container_width=True) # type: ignore fig_complexity_calls = create_bar_chart( - df=complexity_df, - x_column="complexity", + df=metric_df, + x_column=field, y_column="avg_extra_tool_calls", - title="Avg Extra Tool Calls Used by Task Complexity", - x_label="Task Complexity", + title=f"Avg Extra Tool Calls Used by {field}", + x_label=field, y_label="Avg Extra Tool Calls Used", count_column="total_tasks", ) st.plotly_chart(fig_complexity_calls, use_container_width=True) # type: ignore -def display_detailed_task_type_analysis( - model_results: ModelResults, selected_type: str +def display_detailed_task_analysis( + model_results: ModelResults, + selected_type: str, + selected_complexity: str, + selected_example_num: str, + selected_prompt_detail: str, ): """Display detailed analysis for a specific task type.""" # first, get only the tasks of the selected type - tasks_for_type_df = create_task_details_dataframe(model_results, selected_type) - if tasks_for_type_df.empty: - st.warning(f"No tasks of type {selected_type} found.") - return - # Now aggregate by complexity for that type - filtered_by_complexity = ( - tasks_for_type_df.groupby("complexity") # type: ignore - .agg( - avg_score=("score", "mean"), - avg_time=("total_time", "mean"), - avg_extra_tool_calls=("extra_tool_calls_used", "mean"), - ) - .reset_index() + tasks_df = create_task_details_dataframe( + model_results, + task_type=selected_type if selected_type != "All" else None, + complexity=selected_complexity if selected_complexity != "All" else None, + examples_in_system_prompt=( + int(selected_example_num) if selected_example_num != "All" else None + ), + prompt_detail=( + selected_prompt_detail if selected_prompt_detail != "All" else None + ), ) - filtered_by_complexity = filtered_by_complexity[ - filtered_by_complexity["complexity"].notna() - ] - - # Display success rate by complexity for the selected task type - if not filtered_by_complexity.empty: - fig_complexity_score = create_bar_chart( - df=filtered_by_complexity, - x_column="complexity", - y_column="avg_score", - title=f"Success Rate by Task Complexity for '{selected_type}' Tasks", - x_label="Task Complexity", - y_label="Avg Score", - y_range=(0.0, 1.0), - count_column="total_tasks", - ) - st.plotly_chart(fig_complexity_score, use_container_width=True) # type: ignore - - # Display success rate by individual task - task_details_df = create_task_details_dataframe(model_results, selected_type) - - if task_details_df.empty: - st.warning(f"No task details available for type: {selected_type}") + if tasks_df.empty: + st.warning(f"No tasks of type {selected_type} found.") return task_stats = ( - task_details_df.groupby("task_prompt") # type: ignore + tasks_df.groupby("task_prompt") # type: ignore .agg({"score": "mean", "total_time": "mean"}) .reset_index() ) @@ -200,7 +180,7 @@ def display_detailed_task_type_analysis( df=task_stats, x_column="task_prompt", y_column="score", - title=f"Avg Score for '{selected_type}' Tasks", + title="Avg Score", x_label="Task", y_label="Avg Score", custom_data=["wrapped_prompt", "score"], @@ -216,7 +196,7 @@ def display_detailed_task_type_analysis( df=task_stats, x_column="task_prompt", y_column="total_time", - title=f"Avg Time for '{selected_type}' Tasks", + title="Avg Time", x_label="Task", y_label="Avg Time (s)", custom_data=["wrapped_prompt", "total_time"], @@ -349,20 +329,58 @@ def render_task_performance_tab(bench_results: BenchmarkResults): # Display performance by complexity st.subheader("Performance by Task Complexity") - display_task_complexity_performance(model_results) + display_task_performance_by_field(model_results, "complexity") + + # Display performance by complexity + st.subheader("Performance by system prompt examples") + display_task_performance_by_field(model_results, "examples_in_system_prompt") + + # Display performance by complexity + st.subheader("Performance by Task's prompt detail") + display_task_performance_by_field(model_results, "prompt_detail") - # Per Task Type Analysis st.subheader("Detailed Task Type Analysis") task_types = get_unique_values_from_results(model_results, "type") - if not task_types: st.warning("No task types available.") return selected_type = st.selectbox( - "Select Task Type", sorted(task_types), key="task_type" + "Select Task Type", ["All"] + sorted(task_types), key="task_type" + ) + + # Add selectboxes for the two additional attributes with "All" as default + complexity_values = get_unique_values_from_results(model_results, "complexity") + selected_complexity = st.selectbox( + "Select Complexity", + ["All"] + complexity_values, + key="complexity_select", + ) + + examples_values = get_unique_values_from_results( + model_results, "examples_in_system_prompt" + ) + selected_examples = st.selectbox( + "Select Examples in System Prompt", + ["All"] + sorted(examples_values), + key="n_shots_select", + ) + prompt_detail_values = get_unique_values_from_results( + model_results, "prompt_detail" + ) + selected_prompt_detail = st.selectbox( + "Select prompt decriptiveness", + ["All"] + prompt_detail_values, + key="prompt_detail_select", + ) + + display_detailed_task_analysis( + model_results, + selected_type, + selected_complexity, + selected_examples, + selected_prompt_detail, ) - display_detailed_task_type_analysis(model_results, selected_type) def render_validator_analysis_tab(bench_results: BenchmarkResults): diff --git a/src/rai_bench/rai_bench/test_models.py b/src/rai_bench/rai_bench/test_models.py index 51f1c7cb5..def97016c 100644 --- a/src/rai_bench/rai_bench/test_models.py +++ b/src/rai_bench/rai_bench/test_models.py @@ -40,7 +40,17 @@ def name(self) -> str: class ManipulationO3DEBenchmarkConfig(BenchmarkConfig): - # by default include all + """Configuration for Manipulation O3DE Benchmark. + + Parameters + ---------- + o3de_config_path : str + path to O3DE configuration file + levels : List[Literal["trivial", "easy", "medium", "hard", "very_hard"]], optional + difficulty levels to include in benchmark, by default all levels are included: + ["trivial", "easy", "medium", "hard", "very_hard"] + """ + o3de_config_path: str levels: List[Literal["trivial", "easy", "medium", "hard", "very_hard"]] = [ "trivial", @@ -56,8 +66,32 @@ def name(self) -> str: class ToolCallingAgentBenchmarkConfig(BenchmarkConfig): - extra_tool_calls: int = 0 + """Configuration for Tool Calling Agent Benchmark. + + Parameters + ---------- + extra_tool_calls : List[int], optional + how many extra tool calls allowed to still pass, by default [0] + prompt_detail : List[Literal["brief", "descriptive"]], optional + how descriptive should task prompt be, by default all levels are included: + ["brief", "descriptive"] + N_shots : List[Literal[0, 2, 5]], optional + how many examples are in system prompt, by default all are included: [0, 2, 5] + complexities : List[Literal["easy", "medium", "hard"]], optional + complexity levels of tasks to include in the benchmark, by default all levels are included: + ["easy", "medium", "hard"] + task_types : List[Literal["basic", "manipulation", "navigation", "custom_interfaces", "spatial_reasoning"]], optional + types of tasks to include in the benchmark, by default all types are included: + ["basic", "manipulation", "navigation", "custom_interfaces", "spatial_reasoning"] + + For more detailed explanation of parameters, see the documentation: + (https://robotecai.github.io/rai/simulation_and_benchmarking/rai_bench/) + """ + + extra_tool_calls: List[int] = [0] complexities: List[Literal["easy", "medium", "hard"]] = ["easy", "medium", "hard"] + N_shots: List[Literal[0, 2, 5]] = [0, 2, 5] + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"] task_types: List[ Literal[ "basic", @@ -154,6 +188,7 @@ def test_models( out_dir: str, additional_model_args: Optional[List[Dict[str, Any]]] = None, ): + # TODO (jmatejcz) add docstring after passing agent logic will be added if additional_model_args is None: additional_model_args = [{} for _ in model_names] @@ -189,6 +224,8 @@ def test_models( tool_calling_tasks = tool_calling_agent.get_tasks( extra_tool_calls=bench_conf.extra_tool_calls, complexities=bench_conf.complexities, + prompt_detail=bench_conf.prompt_detail, + n_shots=bench_conf.N_shots, task_types=bench_conf.task_types, ) tool_calling_agent.run_benchmark( diff --git a/src/rai_bench/rai_bench/tool_calling_agent/benchmark.py b/src/rai_bench/rai_bench/tool_calling_agent/benchmark.py index 32547b90c..4328c0a8d 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/benchmark.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/benchmark.py @@ -160,8 +160,10 @@ def run_next(self, agent: CompiledStateGraph, experiment_id: uuid.UUID) -> None: self.logger.info(f"TASK SCORE: {score}, TOTAL TIME: {total_time:.3f}") task_result = TaskResult( - task_prompt=task.get_prompt(), + task_prompt=task.get_base_prompt(), system_prompt=task.get_system_prompt(), + examples_in_system_prompt=task.n_shots, + prompt_detail=task.prompt_detail, type=task.type, extra_tool_calls=task.extra_tool_calls, extra_tool_calls_used=total_extra_calls, diff --git a/src/rai_bench/rai_bench/tool_calling_agent/interfaces.py b/src/rai_bench/rai_bench/tool_calling_agent/interfaces.py index 65ae83b25..e35d5a6f4 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/interfaces.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/interfaces.py @@ -19,6 +19,7 @@ from langchain_core.messages import AIMessage, BaseMessage, ToolCall from langchain_core.runnables.config import DEFAULT_RECURSION_LIMIT from langchain_core.tools import BaseTool +from pydantic import BaseModel from rai_bench.tool_calling_agent.results_tracking import SubTaskResult, ValidatorResult @@ -454,14 +455,33 @@ def validate(self, tool_calls: List[ToolCall]) -> Tuple[bool, List[ToolCall]]: pass +class TaskArgs(BaseModel): + """Holds the configurations specified by user + + Parameters + ---------- + extra_tool_calls : int, optional + how many extra tool calls allowed to still pass, by default 0 + prompt_detail : Literal["brief", "descriptive"], optional + how descriptive should task prompt be, by default "brief" + examples_in_system_prompt : Literal[0, 2, 5], optional + how many examples are in system prompt, by default 0 + """ + + extra_tool_calls: int = 0 + prompt_detail: Literal["brief", "descriptive"] = "brief" + examples_in_system_prompt: Literal[0, 2, 5] = 0 + + class Task(ABC): complexity: Literal["easy", "medium", "hard"] + type: str recursion_limit: int = DEFAULT_RECURSION_LIMIT def __init__( self, validators: List[Validator], - extra_tool_calls: int = 0, + task_args: TaskArgs, logger: loggers_type | None = None, ) -> None: """ @@ -473,21 +493,30 @@ def __init__( Attributes ---------- + complexity : Literal["easy", "medium", "hard"] + difficulty level of the task + type : str + type identifier for the task + recursion_limit : int, optional + maximum recursion depth allowed, by default DEFAULT_RECURSION_LIMIT + + Parameters + ---------- validators : List[Validator] List of validators that will be applied in sequence. - extra_tool_calls : int - Number of additional tool calls allowed beyond the minimum required. + task_args : TaskArgs + Configuration parameters for the task specified by user logger : logging.Logger Logger for recording task validation results and errors. - result : Result - Object tracking the validation results across all validators. """ if logger: self.logger = logger else: self.logger = logging.getLogger(__name__) self.validators = validators - self.extra_tool_calls = extra_tool_calls + self.extra_tool_calls = task_args.extra_tool_calls + self.prompt_detail = task_args.prompt_detail + self.n_shots = task_args.examples_in_system_prompt def set_logger(self, logger: loggers_type): self.logger = logger @@ -523,16 +552,25 @@ def available_tools(self) -> List[BaseTool]: @property @abstractmethod - def type(self) -> str: - """Type of task, for example: manipulation""" + def optional_tool_calls_number(self) -> int: + """Optional tool calls means calls that are not considered error. + For example listing topics at the beginning.""" pass @property def max_tool_calls_number(self) -> int: - return self.required_calls + self.extra_tool_calls + """maxiumum number of call to still pass task. + Includes extra tool calls params. + and optional tool calls number which depends on task. + """ + return ( + self.required_calls + + self.optional_tool_calls_number + + self.extra_tool_calls + ) @property - def required_calls(self): + def required_calls(self) -> int: """Minimal number of calls required to complete task""" total = 0 for val in self.validators: @@ -550,6 +588,14 @@ def get_system_prompt(self) -> str: """ pass + @abstractmethod + def get_base_prompt(self) -> str: + """ + Get the base task instruciton, + it will be used to identify task in results processing + """ + pass + @abstractmethod def get_prompt(self) -> str: """Get the task instruction - the prompt that will be passed to agent. diff --git a/src/rai_bench/rai_bench/tool_calling_agent/mocked_ros2_interfaces.py b/src/rai_bench/rai_bench/tool_calling_agent/mocked_ros2_interfaces.py new file mode 100644 index 000000000..f9758499f --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/mocked_ros2_interfaces.py @@ -0,0 +1,3400 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Type + +from pydantic import BaseModel +from rai.types import ( + CameraInfo, + Image, +) +from rai.types.rai_interfaces import ( + ManipulatorMoveToRequest, + RAIDetectionArray, + RAIGroundedSamRequest, + RAIGroundingDinoRequest, +) + +from rai_bench.tool_calling_agent.messages.actions import ( + AssistedTeleopGoal, + BackUpGoal, + ComputePathThroughPosesGoal, + ComputePathToPoseGoal, + DriveOnHeadingGoal, + FollowPathGoal, + FollowWaypointsGoal, + NavigateThroughPosesGoal, + NavigateToPoseGoal, + SmoothPathGoal, + SpinGoal, + WaitGoal, +) +from rai_bench.tool_calling_agent.messages.base import Clock +from rai_bench.tool_calling_agent.messages.services import ( + StringListRequest, + VectorStoreRetrievalRequest, + WhatISeeRequest, +) +from rai_bench.tool_calling_agent.messages.topics import AudioMessage, HRIMessage + +# dict of interfaces where keys are interfaces types and values are output +# of GetROS2MessageInterfaceTool which are same as ros2 interface show outputs +# the dict contains custom as well as couple other common interfaces + +COMMON_INTERFACES: Dict[str, str] = { + "sensor_msgs/msg/CameraInfo": """ +# This message defines meta information for a camera. It should be in a +# camera namespace on topic "camera_info" and accompanied by up to five +# image topics named: +# +# image_raw - raw data from the camera driver, possibly Bayer encoded +# image - monochrome, distorted +# image_color - color, distorted +# image_rect - monochrome, rectified +# image_rect_color - color, rectified +# +# The image_pipeline contains packages (image_proc, stereo_image_proc) +# for producing the four processed image topics from image_raw and +# camera_info. The meaning of the camera parameters are described in +# detail at http://www.ros.org/wiki/image_pipeline/CameraInfo. +# +# The image_geometry package provides a user-friendly interface to +# common operations using this meta information. If you want to, e.g., +# project a 3d point into image coordinates, we strongly recommend +# using image_geometry. +# +# If the camera is uncalibrated, the matrices D, K, R, P should be left +# zeroed out. In particular, clients may assume that K[0] == 0.0 +# indicates an uncalibrated camera. + +####################################################################### +# Image acquisition info # +####################################################################### + +# Time of image acquisition, camera coordinate frame ID +std_msgs/Header header # Header timestamp should be acquisition time of image + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of camera + # +x should point to the right in the image + # +y should point down in the image + # +z should point into the plane of the image + + +####################################################################### +# Calibration Parameters # +####################################################################### +# These are fixed during camera calibration. Their values will be the # +# same in all messages until the camera is recalibrated. Note that # +# self-calibrating systems may "recalibrate" frequently. # +# # +# The internal parameters can be used to warp a raw (distorted) image # +# to: # +# 1. An undistorted image (requires D and K) # +# 2. A rectified image (requires D, K, R) # +# The projection matrix P projects 3D points into the rectified image.# +####################################################################### + +# The image dimensions with which the camera was calibrated. +# Normally this will be the full camera resolution in pixels. +uint32 height +uint32 width + +# The distortion model used. Supported models are listed in +# sensor_msgs/distortion_models.hpp. For most cameras, "plumb_bob" - a +# simple model of radial and tangential distortion - is sufficent. +string distortion_model + +# The distortion parameters, size depending on the distortion model. +# For "plumb_bob", the 5 parameters are: (k1, k2, t1, t2, k3). +float64[] d + +# Intrinsic camera matrix for the raw (distorted) images. +# [fx 0 cx] +# K = [ 0 fy cy] +# [ 0 0 1] +# Projects 3D points in the camera coordinate frame to 2D pixel +# coordinates using the focal lengths (fx, fy) and principal point +# (cx, cy). +float64[9] k # 3x3 row-major matrix + +# Rectification matrix (stereo cameras only) +# A rotation matrix aligning the camera coordinate system to the ideal +# stereo image plane so that epipolar lines in both stereo images are +# parallel. +float64[9] r # 3x3 row-major matrix + +# Projection/camera matrix +# [fx' 0 cx' Tx] +# P = [ 0 fy' cy' Ty] +# [ 0 0 1 0] +# By convention, this matrix specifies the intrinsic (camera) matrix +# of the processed (rectified) image. That is, the left 3x3 portion +# is the normal camera intrinsic matrix for the rectified image. +# It projects 3D points in the camera coordinate frame to 2D pixel +# coordinates using the focal lengths (fx', fy') and principal point +# (cx', cy') - these may differ from the values in K. +# For monocular cameras, Tx = Ty = 0. Normally, monocular cameras will +# also have R = the identity and P[1:3,1:3] = K. +# For a stereo pair, the fourth column [Tx Ty 0]' is related to the +# position of the optical center of the second camera in the first +# camera's frame. We assume Tz = 0 so both cameras are in the same +# stereo image plane. The first camera always has Tx = Ty = 0. For +# the right (second) camera of a horizontal stereo pair, Ty = 0 and +# Tx = -fx' * B, where B is the baseline between the cameras. +# Given a 3D point [X Y Z]', the projection (x, y) of the point onto +# the rectified image is given by: +# [u v w]' = P * [X Y Z 1]' +# x = u / w +# y = v / w +# This holds for both images of a stereo pair. +float64[12] p # 3x4 row-major matrix + + +####################################################################### +# Operational Parameters # +####################################################################### +# These define the image region actually captured by the camera # +# driver. Although they affect the geometry of the output image, they # +# may be changed freely without recalibrating the camera. # +####################################################################### + +# Binning refers here to any camera setting which combines rectangular +# neighborhoods of pixels into larger "super-pixels." It reduces the +# resolution of the output image to +# (width / binning_x) x (height / binning_y). +# The default values binning_x = binning_y = 0 is considered the same +# as binning_x = binning_y = 1 (no subsampling). +uint32 binning_x +uint32 binning_y + +# Region of interest (subwindow of full camera resolution), given in +# full resolution (unbinned) image coordinates. A particular ROI +# always denotes the same window of pixels on the camera sensor, +# regardless of binning settings. +# The default setting of roi (all values 0) is considered the same as +# full resolution (roi.width = width, roi.height = height). +RegionOfInterest roi + # + uint32 x_offset # + # (0 if the ROI includes the left edge of the image) + uint32 y_offset # + # (0 if the ROI includes the top edge of the image) + uint32 height # + uint32 width # + bool do_rectify +""", + "sensor_msgs/msg/Image": """ +# This message contains an uncompressed image +# (0, 0) is at top-left corner of image + +std_msgs/Header header # Header timestamp should be acquisition time of image + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of cameara + # +x should point to the right in the image + # +y should point down in the image + # +z should point into to plane of the image + # If the frame_id here and the frame_id of the CameraInfo + # message associated with the image conflict + # the behavior is undefined + +uint32 height # image height, that is, number of rows +uint32 width # image width, that is, number of columns + +# The legal values for encoding are in file src/image_encodings.cpp +# If you want to standardize a new string format, join +# ros-users@lists.ros.org and send an email proposing a new encoding. + +string encoding # Encoding of pixels -- channel meaning, ordering, size + # taken from the list of strings in include/sensor_msgs/image_encodings.hpp + +uint8 is_bigendian # is this data bigendian? +uint32 step # Full row length in bytes +uint8[] data # actual matrix data, size is (step * rows) +""", + "rosgraph_msgs/msg/Clock": """ +# This message communicates the current time. +# +# For more information, see https://design.ros2.org/articles/clock_and_time.html. +builtin_interfaces/Time clock + int32 sec + uint32 nanosec +""", +} +MANIPULATION_INTERFACES: Dict[str, str] = { + "moveit_msgs/action/ExecuteTrajectory": """# The trajectory to execute +RobotTrajectory trajectory + trajectory_msgs/JointTrajectory joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + trajectory_msgs/MultiDOFJointTrajectory multi_dof_joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + MultiDOFJointTrajectoryPoint[] points + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + +--- + +# Error code - encodes the overall reason for failure +MoveItErrorCodes error_code + int32 val + int32 SUCCESS=1 + int32 FAILURE=99999 + int32 PLANNING_FAILED=-1 + int32 INVALID_MOTION_PLAN=-2 + int32 MOTION_PLAN_INVALIDATED_BY_ENVIRONMENT_CHANGE=-3 + int32 CONTROL_FAILED=-4 + int32 UNABLE_TO_AQUIRE_SENSOR_DATA=-5 + int32 TIMED_OUT=-6 + int32 PREEMPTED=-7 + int32 START_STATE_IN_COLLISION=-10 + int32 START_STATE_VIOLATES_PATH_CONSTRAINTS=-11 + int32 START_STATE_INVALID=-26 + int32 GOAL_IN_COLLISION=-12 + int32 GOAL_VIOLATES_PATH_CONSTRAINTS=-13 + int32 GOAL_CONSTRAINTS_VIOLATED=-14 + int32 GOAL_STATE_INVALID=-27 + int32 UNRECOGNIZED_GOAL_TYPE=-28 + int32 INVALID_GROUP_NAME=-15 + int32 INVALID_GOAL_CONSTRAINTS=-16 + int32 INVALID_ROBOT_STATE=-17 + int32 INVALID_LINK_NAME=-18 + int32 INVALID_OBJECT_NAME=-19 + int32 FRAME_TRANSFORM_FAILURE=-21 + int32 COLLISION_CHECKING_UNAVAILABLE=-22 + int32 ROBOT_STATE_STALE=-23 + int32 SENSOR_INFO_STALE=-24 + int32 COMMUNICATION_FAILURE=-25 + int32 CRASH=-29 + int32 ABORT=-30 + int32 NO_IK_SOLUTION=-31 + +--- + +# The internal state that the move group action currently is in +string state} +""", + "moveit_msgs/action/MoveGroup": """# Motion planning request to pass to planner +MotionPlanRequest request + WorkspaceParameters workspace_parameters + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Vector3 min_corner + float64 x + float64 y + float64 z + geometry_msgs/Vector3 max_corner + float64 x + float64 y + float64 z + RobotState start_state + sensor_msgs/JointState joint_state + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] name + float64[] position + float64[] velocity + float64[] effort + sensor_msgs/MultiDOFJointState multi_dof_joint_state + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] twist + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Wrench[] wrench + Vector3 force + float64 x + float64 y + float64 z + Vector3 torque + float64 x + float64 y + float64 z + AttachedCollisionObject[] attached_collision_objects + string link_name + CollisionObject object + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string id + object_recognition_msgs/ObjectType type + string key + string db + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Plane[] planes + # + float64[4] coef + geometry_msgs/Pose[] plane_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string[] subframe_names + geometry_msgs/Pose[] subframe_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + byte ADD=0 + byte REMOVE=1 + byte APPEND=2 + byte MOVE=3 + byte operation + string[] touch_links + trajectory_msgs/JointTrajectory detach_posture + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + float64 weight + bool is_diff + Constraints[] goal_constraints + string name + JointConstraint[] joint_constraints + string joint_name + float64 position + float64 tolerance_above + float64 tolerance_below + float64 weight + PositionConstraint[] position_constraints + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string link_name + geometry_msgs/Vector3 target_point_offset + float64 x + float64 y + float64 z + BoundingVolume constraint_region + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64 weight + OrientationConstraint[] orientation_constraints + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string link_name + float64 absolute_x_axis_tolerance + float64 absolute_y_axis_tolerance + float64 absolute_z_axis_tolerance + uint8 parameterization + uint8 XYZ_EULER_ANGLES=0 + uint8 ROTATION_VECTOR=1 + float64 weight + VisibilityConstraint[] visibility_constraints + float64 target_radius + geometry_msgs/PoseStamped target_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + int32 cone_sides + geometry_msgs/PoseStamped sensor_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64 max_view_angle + float64 max_range_angle + uint8 SENSOR_Z=0 + uint8 SENSOR_Y=1 + uint8 SENSOR_X=2 + uint8 sensor_view_direction + float64 weight + Constraints path_constraints + string name + JointConstraint[] joint_constraints + string joint_name + float64 position + float64 tolerance_above + float64 tolerance_below + float64 weight + PositionConstraint[] position_constraints + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string link_name + geometry_msgs/Vector3 target_point_offset + float64 x + float64 y + float64 z + BoundingVolume constraint_region + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64 weight + OrientationConstraint[] orientation_constraints + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string link_name + float64 absolute_x_axis_tolerance + float64 absolute_y_axis_tolerance + float64 absolute_z_axis_tolerance + uint8 parameterization + uint8 XYZ_EULER_ANGLES=0 + uint8 ROTATION_VECTOR=1 + float64 weight + VisibilityConstraint[] visibility_constraints + float64 target_radius + geometry_msgs/PoseStamped target_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + int32 cone_sides + geometry_msgs/PoseStamped sensor_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64 max_view_angle + float64 max_range_angle + uint8 SENSOR_Z=0 + uint8 SENSOR_Y=1 + uint8 SENSOR_X=2 + uint8 sensor_view_direction + float64 weight + TrajectoryConstraints trajectory_constraints + Constraints[] constraints + string name + JointConstraint[] joint_constraints + string joint_name + float64 position + float64 tolerance_above + float64 tolerance_below + float64 weight + PositionConstraint[] position_constraints + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string link_name + geometry_msgs/Vector3 target_point_offset + float64 x + float64 y + float64 z + BoundingVolume constraint_region + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64 weight + OrientationConstraint[] orientation_constraints + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string link_name + float64 absolute_x_axis_tolerance + float64 absolute_y_axis_tolerance + float64 absolute_z_axis_tolerance + uint8 parameterization + uint8 XYZ_EULER_ANGLES=0 + uint8 ROTATION_VECTOR=1 + float64 weight + VisibilityConstraint[] visibility_constraints + float64 target_radius + geometry_msgs/PoseStamped target_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + int32 cone_sides + geometry_msgs/PoseStamped sensor_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64 max_view_angle + float64 max_range_angle + uint8 SENSOR_Z=0 + uint8 SENSOR_Y=1 + uint8 SENSOR_X=2 + uint8 sensor_view_direction + float64 weight + GenericTrajectory[] reference_trajectories + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + trajectory_msgs/JointTrajectory[] joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + moveit_msgs/CartesianTrajectory[] cartesian_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string tracked_frame + CartesianTrajectoryPoint[] points + CartesianPoint point + geometry_msgs/Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist velocity + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Accel acceleration + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + string pipeline_id + string planner_id + string group_name + int32 num_planning_attempts + float64 allowed_planning_time + float64 max_velocity_scaling_factor + float64 max_acceleration_scaling_factor + string cartesian_speed_end_effector_link + float64 max_cartesian_speed # + +# Planning options +PlanningOptions planning_options + PlanningScene planning_scene_diff + string name + RobotState robot_state + sensor_msgs/JointState joint_state + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] name + float64[] position + float64[] velocity + float64[] effort + sensor_msgs/MultiDOFJointState multi_dof_joint_state + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] twist + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Wrench[] wrench + Vector3 force + float64 x + float64 y + float64 z + Vector3 torque + float64 x + float64 y + float64 z + AttachedCollisionObject[] attached_collision_objects + string link_name + CollisionObject object + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string id + object_recognition_msgs/ObjectType type + string key + string db + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Plane[] planes + # + float64[4] coef + geometry_msgs/Pose[] plane_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string[] subframe_names + geometry_msgs/Pose[] subframe_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + byte ADD=0 + byte REMOVE=1 + byte APPEND=2 + byte MOVE=3 + byte operation + string[] touch_links + trajectory_msgs/JointTrajectory detach_posture + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + float64 weight + bool is_diff + string robot_model_name + geometry_msgs/TransformStamped[] fixed_frame_transforms + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string child_frame_id + Transform transform + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + AllowedCollisionMatrix allowed_collision_matrix + string[] entry_names + AllowedCollisionEntry[] entry_values + bool[] enabled + string[] default_entry_names + bool[] default_entry_values + LinkPadding[] link_padding + string link_name + float64 padding + LinkScale[] link_scale + string link_name + float64 scale + ObjectColor[] object_colors + string id + std_msgs/ColorRGBA color + float32 r + float32 g + float32 b + float32 a + PlanningSceneWorld world + CollisionObject[] collision_objects + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string id + object_recognition_msgs/ObjectType type + string key + string db + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Plane[] planes + # + float64[4] coef + geometry_msgs/Pose[] plane_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string[] subframe_names + geometry_msgs/Pose[] subframe_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + byte ADD=0 + byte REMOVE=1 + byte APPEND=2 + byte MOVE=3 + byte operation + octomap_msgs/OctomapWithPose octomap + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Pose origin + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + octomap_msgs/Octomap octomap + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + bool binary + string id + float64 resolution + int8[] data + bool is_diff + bool plan_only + bool look_around + int32 look_around_attempts + float64 max_safe_execution_cost + bool replan + int32 replan_attempts + float64 replan_delay + +--- + +# An error code reflecting what went wrong +MoveItErrorCodes error_code + int32 val + int32 SUCCESS=1 + int32 FAILURE=99999 + int32 PLANNING_FAILED=-1 + int32 INVALID_MOTION_PLAN=-2 + int32 MOTION_PLAN_INVALIDATED_BY_ENVIRONMENT_CHANGE=-3 + int32 CONTROL_FAILED=-4 + int32 UNABLE_TO_AQUIRE_SENSOR_DATA=-5 + int32 TIMED_OUT=-6 + int32 PREEMPTED=-7 + int32 START_STATE_IN_COLLISION=-10 + int32 START_STATE_VIOLATES_PATH_CONSTRAINTS=-11 + int32 START_STATE_INVALID=-26 + int32 GOAL_IN_COLLISION=-12 + int32 GOAL_VIOLATES_PATH_CONSTRAINTS=-13 + int32 GOAL_CONSTRAINTS_VIOLATED=-14 + int32 GOAL_STATE_INVALID=-27 + int32 UNRECOGNIZED_GOAL_TYPE=-28 + int32 INVALID_GROUP_NAME=-15 + int32 INVALID_GOAL_CONSTRAINTS=-16 + int32 INVALID_ROBOT_STATE=-17 + int32 INVALID_LINK_NAME=-18 + int32 INVALID_OBJECT_NAME=-19 + int32 FRAME_TRANSFORM_FAILURE=-21 + int32 COLLISION_CHECKING_UNAVAILABLE=-22 + int32 ROBOT_STATE_STALE=-23 + int32 SENSOR_INFO_STALE=-24 + int32 COMMUNICATION_FAILURE=-25 + int32 CRASH=-29 + int32 ABORT=-30 + int32 NO_IK_SOLUTION=-31 + +# The full starting state of the robot at the start of the trajectory +moveit_msgs/RobotState trajectory_start + sensor_msgs/JointState joint_state + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] name + float64[] position + float64[] velocity + float64[] effort + sensor_msgs/MultiDOFJointState multi_dof_joint_state + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] twist + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Wrench[] wrench + Vector3 force + float64 x + float64 y + float64 z + Vector3 torque + float64 x + float64 y + float64 z + AttachedCollisionObject[] attached_collision_objects + string link_name + CollisionObject object + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string id + object_recognition_msgs/ObjectType type + string key + string db + shape_msgs/SolidPrimitive[] primitives + uint8 BOX=1 + uint8 SPHERE=2 + uint8 CYLINDER=3 + uint8 CONE=4 + uint8 PRISM=5 + uint8 type + float64[<=3] dimensions # + uint8 BOX_X=0 + uint8 BOX_Y=1 + uint8 BOX_Z=2 + uint8 SPHERE_RADIUS=0 + uint8 CYLINDER_HEIGHT=0 + uint8 CYLINDER_RADIUS=1 + uint8 CONE_HEIGHT=0 + uint8 CONE_RADIUS=1 + uint8 PRISM_HEIGHT=0 + geometry_msgs/Polygon polygon + Point32[] points + # + # + float32 x + float32 y + float32 z + geometry_msgs/Pose[] primitive_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Mesh[] meshes + MeshTriangle[] triangles + uint32[3] vertex_indices + geometry_msgs/Point[] vertices + float64 x + float64 y + float64 z + geometry_msgs/Pose[] mesh_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + shape_msgs/Plane[] planes + # + float64[4] coef + geometry_msgs/Pose[] plane_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + string[] subframe_names + geometry_msgs/Pose[] subframe_poses + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + byte ADD=0 + byte REMOVE=1 + byte APPEND=2 + byte MOVE=3 + byte operation + string[] touch_links + trajectory_msgs/JointTrajectory detach_posture + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + float64 weight + bool is_diff + +# The trajectory that moved group produced for execution +moveit_msgs/RobotTrajectory planned_trajectory + trajectory_msgs/JointTrajectory joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + trajectory_msgs/MultiDOFJointTrajectory multi_dof_joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + MultiDOFJointTrajectoryPoint[] points + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + +# The trace of the trajectory recorded during execution +moveit_msgs/RobotTrajectory executed_trajectory + trajectory_msgs/JointTrajectory joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + trajectory_msgs/MultiDOFJointTrajectory multi_dof_joint_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + MultiDOFJointTrajectoryPoint[] points + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + +# The amount of time it took to complete the motion plan +float64 planning_time + +--- + +# The internal state that the move group action currently is in +string state +""", + "control_msgs/action/FollowJointTrajectory": """# The trajectory for all revolute, continuous or prismatic joints +trajectory_msgs/JointTrajectory trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + JointTrajectoryPoint[] points + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec +# The trajectory for all planar or floating joints (i.e. individual joints with more than one DOF) +trajectory_msgs/MultiDOFJointTrajectory multi_dof_trajectory + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + string[] joint_names + MultiDOFJointTrajectoryPoint[] points + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + +# Tolerances for the trajectory. If the measured joint values fall +# outside the tolerances the trajectory goal is aborted. Any +# tolerances that are not specified (by being omitted or set to 0) are +# set to the defaults for the action server (often taken from the +# parameter server). + +# Tolerances applied to the joints as the trajectory is executed. If +# violated, the goal aborts with error_code set to +# PATH_TOLERANCE_VIOLATED. +JointTolerance[] path_tolerance + # + string name + float64 position # + float64 velocity # + float64 acceleration # +JointComponentTolerance[] component_path_tolerance + uint16 X_AXIS=1 + uint16 Y_AXIS=2 + uint16 Z_AXIS=3 + uint16 TRANSLATION=4 + uint16 ROTATION=5 + string joint_name + uint16 component + float64 position + float64 velocity + float64 acceleration + +# To report success, the joints must be within goal_tolerance of the +# final trajectory value. The goal must be achieved by time the +# trajectory ends plus goal_time_tolerance. (goal_time_tolerance +# allows some leeway in time, so that the trajectory goal can still +# succeed even if the joints reach the goal some time after the +# precise end time of the trajectory). +# +# If the joints are not within goal_tolerance after "trajectory finish +# time" + goal_time_tolerance, the goal aborts with error_code set to +# GOAL_TOLERANCE_VIOLATED +JointTolerance[] goal_tolerance + # + string name + float64 position # + float64 velocity # + float64 acceleration # +JointComponentTolerance[] component_goal_tolerance + uint16 X_AXIS=1 + uint16 Y_AXIS=2 + uint16 Z_AXIS=3 + uint16 TRANSLATION=4 + uint16 ROTATION=5 + string joint_name + uint16 component + float64 position + float64 velocity + float64 acceleration +builtin_interfaces/Duration goal_time_tolerance + int32 sec + uint32 nanosec + +--- +int32 error_code +int32 SUCCESSFUL = 0 +int32 INVALID_GOAL = -1 +int32 INVALID_JOINTS = -2 +int32 OLD_HEADER_TIMESTAMP = -3 +int32 PATH_TOLERANCE_VIOLATED = -4 +int32 GOAL_TOLERANCE_VIOLATED = -5 + +# Human readable description of the error code. Contains complementary +# information that is especially useful when execution fails, for instance: +# - INVALID_GOAL: The reason for the invalid goal (e.g., the requested +# trajectory is in the past). +# - INVALID_JOINTS: The mismatch between the expected controller joints +# and those provided in the goal. +# - PATH_TOLERANCE_VIOLATED and GOAL_TOLERANCE_VIOLATED: Which joint +# violated which tolerance, and by how much. +string error_string + +--- +std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id +string[] joint_names +trajectory_msgs/JointTrajectoryPoint desired + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec +trajectory_msgs/JointTrajectoryPoint actual + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec +trajectory_msgs/JointTrajectoryPoint error + float64[] positions + float64[] velocities + float64[] accelerations + float64[] effort + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec + +string[] multi_dof_joint_names +trajectory_msgs/MultiDOFJointTrajectoryPoint multi_dof_desired + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec +trajectory_msgs/MultiDOFJointTrajectoryPoint multi_dof_actual + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec +trajectory_msgs/MultiDOFJointTrajectoryPoint multi_dof_error + geometry_msgs/Transform[] transforms + Vector3 translation + float64 x + float64 y + float64 z + Quaternion rotation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + geometry_msgs/Twist[] velocities + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + geometry_msgs/Twist[] accelerations + Vector3 linear + float64 x + float64 y + float64 z + Vector3 angular + float64 x + float64 y + float64 z + builtin_interfaces/Duration time_from_start + int32 sec + uint32 nanosec +""", + "/panda_hand_controller/gripper_cmd": """GripperCommand command +float64 position +float64 max_effort +--- +float64 position # The current gripper gap size (in meters) +float64 effort # The current effort exerted (in Newtons) +bool stalled # True iff the gripper is exerting max effort and not moving +bool reached_goal # True iff the gripper position has reached the commanded setpoint +--- +float64 position # The current gripper gap size (in meters) +float64 effort # The current effort exerted (in Newtons) +bool stalled # True iff the gripper is exerting max effort and not moving +bool reached_goal # True iff the gripper position has reached the commanded setpoint +""", +} + + +NAVIGATION_INTERFACES: Dict[str, str] = { + "nav2_msgs/action/NavigateToPose": """#goal definition +geometry_msgs/PoseStamped pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +string behavior_tree +--- +#result definition +std_msgs/Empty result +--- +#feedback definition +geometry_msgs/PoseStamped current_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +builtin_interfaces/Duration navigation_time + int32 sec + uint32 nanosec +builtin_interfaces/Duration estimated_time_remaining + int32 sec + uint32 nanosec +int16 number_of_recoveries +float32 distance_remaining +""", + "nav2_msgs/action/AssistedTeleop": """#goal definition +builtin_interfaces/Duration time_allowance + int32 sec + uint32 nanosec +--- +#result definition +builtin_interfaces/Duration total_elapsed_time + int32 sec + uint32 nanosec +--- +#feedback +builtin_interfaces/Duration current_teleop_duration + int32 sec + uint32 nanosec""", + "nav2_msgs/action/BackUp": """#goal definition +geometry_msgs/Point target + float64 x + float64 y + float64 z +float32 speed +builtin_interfaces/Duration time_allowance + int32 sec + uint32 nanosec +--- +#result definition +builtin_interfaces/Duration total_elapsed_time + int32 sec + uint32 nanosec +--- +#feedback definition +float32 distance_traveled""", + "nav2_msgs/action/ComputePathThroughPoses": """#goal definition +geometry_msgs/PoseStamped[] goals + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +geometry_msgs/PoseStamped start + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +string planner_id +bool use_start # If false, use current robot pose as path start, if true, use start above instead +--- +#result definition +nav_msgs/Path path + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +builtin_interfaces/Duration planning_time + int32 sec + uint32 nanosec +--- +#feedback definition""", + "nav2_msgs/action/ComputePathToPose": """#goal definition +geometry_msgs/PoseStamped goal + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +geometry_msgs/PoseStamped start + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +string planner_id +bool use_start # If false, use current robot pose as path start, if true, use start above instead +--- +#result definition +nav_msgs/Path path + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +builtin_interfaces/Duration planning_time + int32 sec + uint32 nanosec +--- +#feedback definition""", + "nav2_msgs/action/DriveOnHeading": """#goal definition +geometry_msgs/Point target + float64 x + float64 y + float64 z +float32 speed +builtin_interfaces/Duration time_allowance + int32 sec + uint32 nanosec +--- +#result definition +builtin_interfaces/Duration total_elapsed_time + int32 sec + uint32 nanosec +--- +#feedback definition +float32 distance_traveled""", + "nav2_msgs/action/FollowPath": """#goal definition +nav_msgs/Path path + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +string controller_id +string goal_checker_id +--- +#result definition +std_msgs/Empty result +--- +#feedback definition +float32 distance_to_goal +float32 speed""", + "nav2_msgs/action/FollowWaypoints": """#goal definition +geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +--- +#result definition +int32[] missed_waypoints +--- +#feedback definition +uint32 current_waypoint""", + "nav2_msgs/action/NavigateThroughPoses": """#goal definition +geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +string behavior_tree +--- +#result definition +std_msgs/Empty result +--- +#feedback definition +geometry_msgs/PoseStamped current_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +builtin_interfaces/Duration navigation_time + int32 sec + uint32 nanosec +builtin_interfaces/Duration estimated_time_remaining + int32 sec + uint32 nanosec +int16 number_of_recoveries +float32 distance_remaining +int16 number_of_poses_remaining +""", + "nav2_msgs/action/SmoothPath": """#goal definition +nav_msgs/Path path + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +string smoother_id +builtin_interfaces/Duration max_smoothing_duration + int32 sec + uint32 nanosec +bool check_for_collisions +--- +#result definition +nav_msgs/Path path + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + geometry_msgs/PoseStamped[] poses + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +builtin_interfaces/Duration smoothing_duration + int32 sec + uint32 nanosec +bool was_completed +--- +#feedback definition +""", + "nav2_msgs/action/Wait": """#goal definition +builtin_interfaces/Duration time + int32 sec + uint32 nanosec +--- +#result definition +builtin_interfaces/Duration total_elapsed_time + int32 sec + uint32 nanosec +--- +#feedback definition +builtin_interfaces/Duration time_left + int32 sec + uint32 nanosec""", +} + +CUSTOM_INTERFACES: Dict[str, str] = { + "rai_interfaces/msg/HRIMessage": """ +# +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id +string text +sensor_msgs/Image[] images + std_msgs/Header header # + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of cameara + # +x should point to the right in the image + # +y should point down in the image + # +z should point into to plane of the image + # If the frame_id here and the frame_id of the CameraInfo + # message associated with the image conflict + # the behavior is undefined + uint32 height # + uint32 width # + string encoding # + # taken from the list of strings in include/sensor_msgs/image_encodings.hpp + uint8 is_bigendian # + uint32 step # + uint8[] data # +rai_interfaces/AudioMessage[] audios + # + # + # + # + # + int16[] audio + uint16 sample_rate + uint16 channels +string communication_id +int64 seq_no +bool seq_end +""", + "rai_interfaces/msg/AudioMessage": """ +# +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +int16[] audio +uint16 sample_rate +uint16 channels +""", + "rai_interfaces/msg/RAIDetectionArray": """ +# +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# A list of 2D detections, for a multi-object 2D detector. +std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + +# A list of the detected proposals. A multi-proposal detector might generate +# this list with many candidate detections generated from a single input. +vision_msgs/Detection2D[] detections + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + ObjectHypothesisWithPose[] results + ObjectHypothesis hypothesis + string class_id + float64 score + geometry_msgs/PoseWithCovariance pose + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64[36] covariance + BoundingBox2D bbox + vision_msgs/Pose2D center + float64 x + float64 y + float64 theta + float64 size_x + float64 size_y + string id +# a list of classes being detected +string[] detection_classes +""", + "rai_interfaces/srv/ManipulatorMoveTo": """ +# +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A simplified approach with binary states for the gripper +bool initial_gripper_state +bool final_gripper_state +geometry_msgs/PoseStamped target_pose + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +--- +bool success +""", + "rai_interfaces/srv/RAIGroundedSam": """ +# +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +RAIDetectionArray detections + # + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + vision_msgs/Detection2D[] detections + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + ObjectHypothesisWithPose[] results + ObjectHypothesis hypothesis + string class_id + float64 score + geometry_msgs/PoseWithCovariance pose + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64[36] covariance + BoundingBox2D bbox + vision_msgs/Pose2D center + float64 x + float64 y + float64 theta + float64 size_x + float64 size_y + string id + string[] detection_classes +sensor_msgs/Image source_img + std_msgs/Header header # + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of cameara + # +x should point to the right in the image + # +y should point down in the image + # +z should point into to plane of the image + # If the frame_id here and the frame_id of the CameraInfo + # message associated with the image conflict + # the behavior is undefined + uint32 height # + uint32 width # + string encoding # + # taken from the list of strings in include/sensor_msgs/image_encodings.hpp + uint8 is_bigendian # + uint32 step # + uint8[] data # +--- +sensor_msgs/Image[] masks + std_msgs/Header header # + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of cameara + # +x should point to the right in the image + # +y should point down in the image + # +z should point into to plane of the image + # If the frame_id here and the frame_id of the CameraInfo + # message associated with the image conflict + # the behavior is undefined + uint32 height # + uint32 width # + string encoding # + # taken from the list of strings in include/sensor_msgs/image_encodings.hpp + uint8 is_bigendian # + uint32 step # + uint8[] data # +""", + "rai_interfaces/srv/RAIGroundingDino": """ +# +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +string classes +float64 box_threshold +float64 text_threshold +sensor_msgs/Image source_img + std_msgs/Header header # + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of cameara + # +x should point to the right in the image + # +y should point down in the image + # +z should point into to plane of the image + # If the frame_id here and the frame_id of the CameraInfo + # message associated with the image conflict + # the behavior is undefined + uint32 height # + uint32 width # + string encoding # + # taken from the list of strings in include/sensor_msgs/image_encodings.hpp + uint8 is_bigendian # + uint32 step # + uint8[] data # +--- +RAIDetectionArray detections + # + # + # + # + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + vision_msgs/Detection2D[] detections + # + std_msgs/Header header + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + ObjectHypothesisWithPose[] results + ObjectHypothesis hypothesis + string class_id + float64 score + geometry_msgs/PoseWithCovariance pose + Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 + float64[36] covariance + BoundingBox2D bbox + vision_msgs/Pose2D center + float64 x + float64 y + float64 theta + float64 size_x + float64 size_y + string id + string[] detection_classes +""", + "rai_interfaces/srv/StringList": """ +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Request - empty +--- +# Response +bool success +string[] string_list +""", + "rai_interfaces/srv/VectorStoreRetrieval": """ +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Request +string query + +--- +# Response +bool success +string message +string[] documents +float32[] scores +""", + "rai_interfaces/srv/WhatISee": """z +# Copyright (C) 2024 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Request (empty) + +--- +# Response, timed with image timestamp +string[] observations +string perception_source +sensor_msgs/Image image + std_msgs/Header header # + builtin_interfaces/Time stamp + int32 sec + uint32 nanosec + string frame_id + # Header frame_id should be optical frame of camera + # origin of frame should be optical center of cameara + # +x should point to the right in the image + # +y should point down in the image + # +z should point into to plane of the image + # If the frame_id here and the frame_id of the CameraInfo + # message associated with the image conflict + # the behavior is undefined + uint32 height # + uint32 width # + string encoding # + # taken from the list of strings in include/sensor_msgs/image_encodings.hpp + uint8 is_bigendian # + uint32 step # + uint8[] data # +geometry_msgs/Pose pose + Point position + float64 x + float64 y + float64 z + Quaternion orientation + float64 x 0 + float64 y 0 + float64 z 0 + float64 w 1 +""", + "rai_interfaces/action/Task": """ +# Goal +string task +string description +string priority + +--- +# Result +bool success +string report + +--- +# Feedback +string current_status +""", + "/load_map": """ +string filename +--- +bool success +""", + "/query_planner_interface": """ +--- + +# The planning instances that could be used in the benchmark +PlannerInterfaceDescription[] planner_interfaces + string name + string pipeline_id + string[] planner_ids + +""", +} + +COMMON_TOPICS_AND_TYPES: Dict[str, str] = { + "/clock": "rosgraph_msgs/msg/Clock", + "/parameter_events": "rcl_interfaces/msg/ParameterEvent", + "/rosout": "rcl_interfaces/msg/Log", + "/tf": "tf2_msgs/msg/TFMessage", + "/tf_static": "tf2_msgs/msg/TFMessage", + "/joint_states": "sensor_msgs/msg/JointState", + "/robot_description": "std_msgs/msg/String", + "/robot_description_semantic": "std_msgs/msg/String", + "/bond": "bond/msg/Status", + "/diagnostics": "diagnostic_msgs/msg/DiagnosticArray", + # Perception topics + "/color_camera_info5": "sensor_msgs/msg/CameraInfo", + "/color_image5": "sensor_msgs/msg/Image", + "/depth_camera_info5": "sensor_msgs/msg/CameraInfo", + "/depth_image5": "sensor_msgs/msg/Image", + "/pointcloud": "sensor_msgs/msg/PointCloud2", + "/scan": "sensor_msgs/msg/LaserScan", + "/odom": "nav_msgs/msg/Odometry", + "/odometry/filtered": "nav_msgs/msg/Odometry", +} + +MANIPULATION_TOPICS_AND_TYPES: Dict[str, str] = { + # MoveIt2 planning and execution + "/move_action/_action/feedback": "moveit_msgs/action/MoveGroup_FeedbackMessage", + "/move_action/_action/status": "action_msgs/msg/GoalStatusArray", + "/execute_trajectory/_action/feedback": "moveit_msgs/action/ExecuteTrajectory_FeedbackMessage", + "/execute_trajectory/_action/status": "action_msgs/msg/GoalStatusArray", + "/motion_plan_request": "moveit_msgs/msg/MotionPlanRequest", + "/display_planned_path": "moveit_msgs/msg/DisplayTrajectory", + "/trajectory_execution_event": "std_msgs/msg/String", + # Planning scene management + "/planning_scene": "moveit_msgs/msg/PlanningScene", + "/planning_scene_world": "moveit_msgs/msg/PlanningSceneWorld", + "/monitored_planning_scene": "moveit_msgs/msg/PlanningScene", + "/collision_object": "moveit_msgs/msg/CollisionObject", + "/attached_collision_object": "moveit_msgs/msg/AttachedCollisionObject", + "/display_contacts": "visualization_msgs/msg/MarkerArray", + # Arm and gripper controllers + "/panda_arm_controller/follow_joint_trajectory/_action/feedback": "control_msgs/action/FollowJointTrajectory_FeedbackMessage", + "/panda_arm_controller/follow_joint_trajectory/_action/status": "action_msgs/msg/GoalStatusArray", + "/panda_hand_controller/gripper_cmd/_action/feedback": "control_msgs/action/GripperCommand_FeedbackMessage", + "/panda_hand_controller/gripper_cmd/_action/status": "action_msgs/msg/GoalStatusArray", +} + + +NAVIGATION_TOPICS_AND_TYPES: Dict[str, str] = { + # Main navigation actions + "/navigate_to_pose/_action/feedback": "nav2_msgs/action/NavigateToPose_FeedbackMessage", + "/navigate_to_pose/_action/status": "action_msgs/msg/GoalStatusArray", + "/navigate_through_poses/_action/feedback": "nav2_msgs/action/NavigateThroughPoses_FeedbackMessage", + "/navigate_through_poses/_action/status": "action_msgs/msg/GoalStatusArray", + "/follow_path/_action/feedback": "nav2_msgs/action/FollowPath_FeedbackMessage", + "/follow_path/_action/status": "action_msgs/msg/GoalStatusArray", + "/follow_waypoints/_action/feedback": "nav2_msgs/action/FollowWaypoints_FeedbackMessage", + "/follow_waypoints/_action/status": "action_msgs/msg/GoalStatusArray", + # Path planning actions + "/compute_path_to_pose/_action/feedback": "nav2_msgs/action/ComputePathToPose_FeedbackMessage", + "/compute_path_to_pose/_action/status": "action_msgs/msg/GoalStatusArray", + "/compute_path_through_poses/_action/feedback": "nav2_msgs/action/ComputePathThroughPoses_FeedbackMessage", + "/compute_path_through_poses/_action/status": "action_msgs/msg/GoalStatusArray", + "/smooth_path/_action/feedback": "nav2_msgs/action/SmoothPath_FeedbackMessage", + "/smooth_path/_action/status": "action_msgs/msg/GoalStatusArray", + # Behavior actions + "/assisted_teleop/_action/feedback": "nav2_msgs/action/AssistedTeleop_FeedbackMessage", + "/assisted_teleop/_action/status": "action_msgs/msg/GoalStatusArray", + "/backup/_action/feedback": "nav2_msgs/action/BackUp_FeedbackMessage", + "/backup/_action/status": "action_msgs/msg/GoalStatusArray", + "/drive_on_heading/_action/feedback": "nav2_msgs/action/DriveOnHeading_FeedbackMessage", + "/drive_on_heading/_action/status": "action_msgs/msg/GoalStatusArray", + "/spin/_action/feedback": "nav2_msgs/action/Spin_FeedbackMessage", + "/spin/_action/status": "action_msgs/msg/GoalStatusArray", + "/wait/_action/feedback": "nav2_msgs/action/Wait_FeedbackMessage", + "/wait/_action/status": "action_msgs/msg/GoalStatusArray", + # Costmaps and mapping + "/global_costmap/costmap": "nav_msgs/msg/OccupancyGrid", + "/global_costmap/costmap_raw": "nav2_msgs/msg/Costmap", + "/global_costmap/costmap_updates": "map_msgs/msg/OccupancyGridUpdate", + "/global_costmap/footprint": "geometry_msgs/msg/Polygon", + "/global_costmap/published_footprint": "geometry_msgs/msg/PolygonStamped", + "/global_costmap/scan": "sensor_msgs/msg/LaserScan", + "/local_costmap/costmap": "nav_msgs/msg/OccupancyGrid", + "/local_costmap/costmap_raw": "nav2_msgs/msg/Costmap", + "/local_costmap/costmap_updates": "map_msgs/msg/OccupancyGridUpdate", + "/local_costmap/footprint": "geometry_msgs/msg/Polygon", + "/local_costmap/published_footprint": "geometry_msgs/msg/PolygonStamped", + "/local_costmap/scan": "sensor_msgs/msg/LaserScan", + "/map": "nav_msgs/msg/OccupancyGrid", + "/map_metadata": "nav_msgs/msg/MapMetaData", + # SLAM + "/slam_toolbox/feedback": "visualization_msgs/msg/InteractiveMarkerFeedback", + "/slam_toolbox/graph_visualization": "visualization_msgs/msg/MarkerArray", + "/slam_toolbox/scan_visualization": "sensor_msgs/msg/LaserScan", + "/slam_toolbox/update": "visualization_msgs/msg/InteractiveMarkerUpdate", + # Path planning and visualization + "/plan": "nav_msgs/msg/Path", + "/plan_smoothed": "nav_msgs/msg/Path", + "/unsmoothed_plan": "nav_msgs/msg/Path", + "/transformed_global_plan": "nav_msgs/msg/Path", + "/trajectories": "visualization_msgs/msg/MarkerArray", + # Control and goals + "/cmd_vel_nav": "geometry_msgs/msg/Twist", + "/cmd_vel_teleop": "geometry_msgs/msg/Twist", + "/goal_pose": "geometry_msgs/msg/PoseStamped", + "/pose": "geometry_msgs/msg/PoseWithCovarianceStamped", + "/preempt_teleop": "std_msgs/msg/Empty", + "/speed_limit": "nav2_msgs/msg/SpeedLimit", + # Behavior tree + "/behavior_tree_log": "nav2_msgs/msg/BehaviorTreeLog", + # Other + "/led_strip": "sensor_msgs/msg/Image", + # Lifecycle management + "/behavior_server/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/bt_navigator/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/controller_server/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/global_costmap/global_costmap/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/local_costmap/local_costmap/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/map_saver/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/planner_server/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/smoother_server/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/velocity_smoother/transition_event": "lifecycle_msgs/msg/TransitionEvent", + "/waypoint_follower/transition_event": "lifecycle_msgs/msg/TransitionEvent", +} + +CUSTOM_TOPICS_AND_TYPES: Dict[str, str] = { + "/hri_message": "rai_interfaces/msg/HRIMessage", + "/audio_message": "rai_interfaces/msg/AudioMessage", + "/detection_array": "rai_interfaces/msg/RAIDetectionArray", +} + + +COMMON_SERVICES_AND_TYPES: Dict[str, str] = { + # Core infrastructure + "/tf2_frames": "tf2_msgs/srv/FrameGraph", + # Container management + "/nav2_container/_container/list_nodes": "composition_interfaces/srv/ListNodes", + "/nav2_container/_container/load_node": "composition_interfaces/srv/LoadNode", + "/nav2_container/_container/unload_node": "composition_interfaces/srv/UnloadNode", + # Robot state and transforms + "/robot_state_publisher/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/robot_state_publisher/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/robot_state_publisher/get_parameters": "rcl_interfaces/srv/GetParameters", + "/robot_state_publisher/list_parameters": "rcl_interfaces/srv/ListParameters", + "/robot_state_publisher/set_parameters": "rcl_interfaces/srv/SetParameters", + "/robot_state_publisher/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/static_transform_publisher/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/static_transform_publisher/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/static_transform_publisher/get_parameters": "rcl_interfaces/srv/GetParameters", + "/static_transform_publisher/list_parameters": "rcl_interfaces/srv/ListParameters", + "/static_transform_publisher/set_parameters": "rcl_interfaces/srv/SetParameters", + "/static_transform_publisher/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + # Simulation/Gazebo services + "/delete_entity": "gazebo_msgs/srv/DeleteEntity", + "/get_available_spawnable_names": "gazebo_msgs/srv/GetWorldProperties", + "/get_spawn_point_info": "gazebo_msgs/srv/GetModelState", + "/get_spawn_points_names": "gazebo_msgs/srv/GetWorldProperties", + "/spawn_entity": "gazebo_msgs/srv/SpawnEntity", + # Parameter services (common pattern for all nodes) + "/launch_ros_138640/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/launch_ros_138640/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/launch_ros_138640/get_parameters": "rcl_interfaces/srv/GetParameters", + "/launch_ros_138640/list_parameters": "rcl_interfaces/srv/ListParameters", + "/launch_ros_138640/set_parameters": "rcl_interfaces/srv/SetParameters", + "/launch_ros_138640/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/launch_ros_2375507/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/launch_ros_2375507/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/launch_ros_2375507/get_parameters": "rcl_interfaces/srv/GetParameters", + "/launch_ros_2375507/list_parameters": "rcl_interfaces/srv/ListParameters", + "/launch_ros_2375507/set_parameters": "rcl_interfaces/srv/SetParameters", + "/launch_ros_2375507/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/o3de_ros2_node/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/o3de_ros2_node/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/o3de_ros2_node/get_parameters": "rcl_interfaces/srv/GetParameters", + "/o3de_ros2_node/list_parameters": "rcl_interfaces/srv/ListParameters", + "/o3de_ros2_node/set_parameters": "rcl_interfaces/srv/SetParameters", + "/o3de_ros2_node/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + # AI/ML services (custom interfaces for perception and documentation) + "/grounded_sam/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/grounded_sam/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/grounded_sam/get_parameters": "rcl_interfaces/srv/GetParameters", + "/grounded_sam/list_parameters": "rcl_interfaces/srv/ListParameters", + "/grounded_sam/set_parameters": "rcl_interfaces/srv/SetParameters", + "/grounded_sam/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/grounding_dino/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/grounding_dino/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/grounding_dino/get_parameters": "rcl_interfaces/srv/GetParameters", + "/grounding_dino/list_parameters": "rcl_interfaces/srv/ListParameters", + "/grounding_dino/set_parameters": "rcl_interfaces/srv/SetParameters", + "/grounding_dino/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/rai_ros2_ari_connector_b6ed00ab6356/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/rai_ros2_ari_connector_b6ed00ab6356/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/rai_ros2_ari_connector_b6ed00ab6356/get_parameters": "rcl_interfaces/srv/GetParameters", + "/rai_ros2_ari_connector_b6ed00ab6356/list_parameters": "rcl_interfaces/srv/ListParameters", + "/rai_ros2_ari_connector_b6ed00ab6356/set_parameters": "rcl_interfaces/srv/SetParameters", + "/rai_ros2_ari_connector_b6ed00ab6356/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", +} + +MANIPULATION_SERVICES_AND_TYPES: Dict[str, str] = { + # MoveIt2 planning services + "/apply_planning_scene": "moveit_msgs/srv/ApplyPlanningScene", + "/check_state_validity": "moveit_msgs/srv/GetStateValidity", + "/clear_octomap": "std_srvs/srv/Empty", + "/compute_cartesian_path": "moveit_msgs/srv/GetCartesianPath", + "/compute_fk": "moveit_msgs/srv/GetPositionFK", + "/compute_ik": "moveit_msgs/srv/GetPositionIK", + "/get_planner_params": "moveit_msgs/srv/GetPlannerParams", + "/get_planning_scene": "moveit_msgs/srv/GetPlanningScene", + "/load_map": "moveit_msgs/srv/LoadMap", + "/plan_kinematic_path": "moveit_msgs/srv/GetMotionPlan", + "/query_planner_interface": "moveit_msgs/srv/QueryPlannerInterfaces", + "/save_map": "moveit_msgs/srv/SaveMap", + "/set_planner_params": "moveit_msgs/srv/SetPlannerParams", + # Custom manipulation interfaces + "/reset_manipulator": "std_srvs/srv/Trigger", + # MoveIt2 component parameter services + "/move_group/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/move_group/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/move_group/get_parameters": "rcl_interfaces/srv/GetParameters", + "/move_group/list_parameters": "rcl_interfaces/srv/ListParameters", + "/move_group/set_parameters": "rcl_interfaces/srv/SetParameters", + "/move_group/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/move_group_private_96220314512624/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/move_group_private_96220314512624/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/move_group_private_96220314512624/get_parameters": "rcl_interfaces/srv/GetParameters", + "/move_group_private_96220314512624/list_parameters": "rcl_interfaces/srv/ListParameters", + "/move_group_private_96220314512624/set_parameters": "rcl_interfaces/srv/SetParameters", + "/move_group_private_96220314512624/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/moveit_simple_controller_manager/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/moveit_simple_controller_manager/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/moveit_simple_controller_manager/get_parameters": "rcl_interfaces/srv/GetParameters", + "/moveit_simple_controller_manager/list_parameters": "rcl_interfaces/srv/ListParameters", + "/moveit_simple_controller_manager/set_parameters": "rcl_interfaces/srv/SetParameters", + "/moveit_simple_controller_manager/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + # Arm controller services + "/arm_controller/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/arm_controller/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/arm_controller/get_parameters": "rcl_interfaces/srv/GetParameters", + "/arm_controller/list_parameters": "rcl_interfaces/srv/ListParameters", + "/arm_controller/set_parameters": "rcl_interfaces/srv/SetParameters", + "/arm_controller/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/state_controller/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/state_controller/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/state_controller/get_parameters": "rcl_interfaces/srv/GetParameters", + "/state_controller/list_parameters": "rcl_interfaces/srv/ListParameters", + "/state_controller/set_parameters": "rcl_interfaces/srv/SetParameters", + "/state_controller/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", +} + +NAVIGATION_SERVICES_AND_TYPES: Dict[str, str] = { + # Action services for navigation behaviors + "/assisted_teleop/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/assisted_teleop/_action/get_result": "nav2_msgs/action/AssistedTeleop_GetResult", + "/assisted_teleop/_action/send_goal": "nav2_msgs/action/AssistedTeleop_SendGoal", + "/backup/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/backup/_action/get_result": "nav2_msgs/action/BackUp_GetResult", + "/backup/_action/send_goal": "nav2_msgs/action/BackUp_SendGoal", + "/drive_on_heading/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/drive_on_heading/_action/get_result": "nav2_msgs/action/DriveOnHeading_GetResult", + "/drive_on_heading/_action/send_goal": "nav2_msgs/action/DriveOnHeading_SendGoal", + "/follow_path/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/follow_path/_action/get_result": "nav2_msgs/action/FollowPath_GetResult", + "/follow_path/_action/send_goal": "nav2_msgs/action/FollowPath_SendGoal", + "/follow_waypoints/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/follow_waypoints/_action/get_result": "nav2_msgs/action/FollowWaypoints_GetResult", + "/follow_waypoints/_action/send_goal": "nav2_msgs/action/FollowWaypoints_SendGoal", + "/spin/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/spin/_action/get_result": "nav2_msgs/action/Spin_GetResult", + "/spin/_action/send_goal": "nav2_msgs/action/Spin_SendGoal", + "/wait/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/wait/_action/get_result": "nav2_msgs/action/Wait_GetResult", + "/wait/_action/send_goal": "nav2_msgs/action/Wait_SendGoal", + # Path planning action services + "/compute_path_through_poses/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/compute_path_through_poses/_action/get_result": "nav2_msgs/action/ComputePathThroughPoses_GetResult", + "/compute_path_through_poses/_action/send_goal": "nav2_msgs/action/ComputePathThroughPoses_SendGoal", + "/compute_path_to_pose/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/compute_path_to_pose/_action/get_result": "nav2_msgs/action/ComputePathToPose_GetResult", + "/compute_path_to_pose/_action/send_goal": "nav2_msgs/action/ComputePathToPose_SendGoal", + "/smooth_path/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/smooth_path/_action/get_result": "nav2_msgs/action/SmoothPath_GetResult", + "/smooth_path/_action/send_goal": "nav2_msgs/action/SmoothPath_SendGoal", + # Main navigation action services + "/navigate_through_poses/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/navigate_through_poses/_action/get_result": "nav2_msgs/action/NavigateThroughPoses_GetResult", + "/navigate_through_poses/_action/send_goal": "nav2_msgs/action/NavigateThroughPoses_SendGoal", + "/navigate_to_pose/_action/cancel_goal": "action_msgs/srv/CancelGoal", + "/navigate_to_pose/_action/get_result": "nav2_msgs/action/NavigateToPose_GetResult", + "/navigate_to_pose/_action/send_goal": "nav2_msgs/action/NavigateToPose_SendGoal", + # Costmap management services + "/global_costmap/clear_around_global_costmap": "nav2_msgs/srv/ClearCostmapAroundRobot", + "/global_costmap/clear_entirely_global_costmap": "nav2_msgs/srv/ClearEntireCostmap", + "/global_costmap/clear_except_global_costmap": "nav2_msgs/srv/ClearCostmapExceptRegion", + "/global_costmap/get_costmap": "nav2_msgs/srv/GetCostmap", + "/local_costmap/clear_around_local_costmap": "nav2_msgs/srv/ClearCostmapAroundRobot", + "/local_costmap/clear_entirely_local_costmap": "nav2_msgs/srv/ClearEntireCostmap", + "/local_costmap/clear_except_local_costmap": "nav2_msgs/srv/ClearCostmapExceptRegion", + "/local_costmap/get_costmap": "nav2_msgs/srv/GetCostmap", + # Path validation + "/is_path_valid": "nav2_msgs/srv/IsPathValid", + # SLAM services + "/slam_toolbox/clear_changes": "slam_toolbox/srv/Clear", + "/slam_toolbox/clear_queue": "slam_toolbox/srv/ClearQueue", + "/slam_toolbox/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/slam_toolbox/deserialize_map": "slam_toolbox/srv/DeserializePoseGraph", + "/slam_toolbox/dynamic_map": "nav_msgs/srv/GetMap", + "/slam_toolbox/get_interactive_markers": "visualization_msgs/srv/GetInteractiveMarkers", + "/slam_toolbox/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/slam_toolbox/get_parameters": "rcl_interfaces/srv/GetParameters", + "/slam_toolbox/list_parameters": "rcl_interfaces/srv/ListParameters", + "/slam_toolbox/manual_loop_closure": "slam_toolbox/srv/LoopClosure", + "/slam_toolbox/pause_new_measurements": "slam_toolbox/srv/Pause", + "/slam_toolbox/save_map": "slam_toolbox/srv/SaveMap", + "/slam_toolbox/serialize_map": "slam_toolbox/srv/SerializePoseGraph", + "/slam_toolbox/set_parameters": "rcl_interfaces/srv/SetParameters", + "/slam_toolbox/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/slam_toolbox/toggle_interactive_mode": "slam_toolbox/srv/ToggleInteractive", + # Map saving + "/map_saver/change_state": "lifecycle_msgs/srv/ChangeState", + "/map_saver/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/map_saver/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/map_saver/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/map_saver/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/map_saver/get_parameters": "rcl_interfaces/srv/GetParameters", + "/map_saver/get_state": "lifecycle_msgs/srv/GetState", + "/map_saver/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/map_saver/list_parameters": "rcl_interfaces/srv/ListParameters", + "/map_saver/save_map": "nav2_msgs/srv/SaveMap", + "/map_saver/set_parameters": "rcl_interfaces/srv/SetParameters", + "/map_saver/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + # Navigation server lifecycle and parameter services + "/behavior_server/change_state": "lifecycle_msgs/srv/ChangeState", + "/behavior_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/behavior_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/behavior_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/behavior_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/behavior_server/get_parameters": "rcl_interfaces/srv/GetParameters", + "/behavior_server/get_state": "lifecycle_msgs/srv/GetState", + "/behavior_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/behavior_server/list_parameters": "rcl_interfaces/srv/ListParameters", + "/behavior_server/set_parameters": "rcl_interfaces/srv/SetParameters", + "/behavior_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/bt_navigator/change_state": "lifecycle_msgs/srv/ChangeState", + "/bt_navigator/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/bt_navigator/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/bt_navigator/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/bt_navigator/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/bt_navigator/get_parameters": "rcl_interfaces/srv/GetParameters", + "/bt_navigator/get_state": "lifecycle_msgs/srv/GetState", + "/bt_navigator/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/bt_navigator/list_parameters": "rcl_interfaces/srv/ListParameters", + "/bt_navigator/set_parameters": "rcl_interfaces/srv/SetParameters", + "/bt_navigator/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/bt_navigator_navigate_through_poses_rclcpp_node/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/bt_navigator_navigate_through_poses_rclcpp_node/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/bt_navigator_navigate_through_poses_rclcpp_node/get_parameters": "rcl_interfaces/srv/GetParameters", + "/bt_navigator_navigate_through_poses_rclcpp_node/list_parameters": "rcl_interfaces/srv/ListParameters", + "/bt_navigator_navigate_through_poses_rclcpp_node/set_parameters": "rcl_interfaces/srv/SetParameters", + "/bt_navigator_navigate_through_poses_rclcpp_node/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/bt_navigator_navigate_to_pose_rclcpp_node/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/bt_navigator_navigate_to_pose_rclcpp_node/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/bt_navigator_navigate_to_pose_rclcpp_node/get_parameters": "rcl_interfaces/srv/GetParameters", + "/bt_navigator_navigate_to_pose_rclcpp_node/list_parameters": "rcl_interfaces/srv/ListParameters", + "/bt_navigator_navigate_to_pose_rclcpp_node/set_parameters": "rcl_interfaces/srv/SetParameters", + "/bt_navigator_navigate_to_pose_rclcpp_node/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/controller_server/change_state": "lifecycle_msgs/srv/ChangeState", + "/controller_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/controller_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/controller_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/controller_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/controller_server/get_parameters": "rcl_interfaces/srv/GetParameters", + "/controller_server/get_state": "lifecycle_msgs/srv/GetState", + "/controller_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/controller_server/list_parameters": "rcl_interfaces/srv/ListParameters", + "/controller_server/set_parameters": "rcl_interfaces/srv/SetParameters", + "/controller_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/global_costmap/global_costmap/change_state": "lifecycle_msgs/srv/ChangeState", + "/global_costmap/global_costmap/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/global_costmap/global_costmap/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/global_costmap/global_costmap/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/global_costmap/global_costmap/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/global_costmap/global_costmap/get_parameters": "rcl_interfaces/srv/GetParameters", + "/global_costmap/global_costmap/get_state": "lifecycle_msgs/srv/GetState", + "/global_costmap/global_costmap/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/global_costmap/global_costmap/list_parameters": "rcl_interfaces/srv/ListParameters", + "/global_costmap/global_costmap/set_parameters": "rcl_interfaces/srv/SetParameters", + "/global_costmap/global_costmap/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/local_costmap/local_costmap/change_state": "lifecycle_msgs/srv/ChangeState", + "/local_costmap/local_costmap/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/local_costmap/local_costmap/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/local_costmap/local_costmap/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/local_costmap/local_costmap/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/local_costmap/local_costmap/get_parameters": "rcl_interfaces/srv/GetParameters", + "/local_costmap/local_costmap/get_state": "lifecycle_msgs/srv/GetState", + "/local_costmap/local_costmap/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/local_costmap/local_costmap/list_parameters": "rcl_interfaces/srv/ListParameters", + "/local_costmap/local_costmap/set_parameters": "rcl_interfaces/srv/SetParameters", + "/local_costmap/local_costmap/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/planner_server/change_state": "lifecycle_msgs/srv/ChangeState", + "/planner_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/planner_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/planner_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/planner_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/planner_server/get_parameters": "rcl_interfaces/srv/GetParameters", + "/planner_server/get_state": "lifecycle_msgs/srv/GetState", + "/planner_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/planner_server/list_parameters": "rcl_interfaces/srv/ListParameters", + "/planner_server/set_parameters": "rcl_interfaces/srv/SetParameters", + "/planner_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/smoother_server/change_state": "lifecycle_msgs/srv/ChangeState", + "/smoother_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/smoother_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/smoother_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/smoother_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/smoother_server/get_parameters": "rcl_interfaces/srv/GetParameters", + "/smoother_server/get_state": "lifecycle_msgs/srv/GetState", + "/smoother_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/smoother_server/list_parameters": "rcl_interfaces/srv/ListParameters", + "/smoother_server/set_parameters": "rcl_interfaces/srv/SetParameters", + "/smoother_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/velocity_smoother/change_state": "lifecycle_msgs/srv/ChangeState", + "/velocity_smoother/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/velocity_smoother/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/velocity_smoother/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/velocity_smoother/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/velocity_smoother/get_parameters": "rcl_interfaces/srv/GetParameters", + "/velocity_smoother/get_state": "lifecycle_msgs/srv/GetState", + "/velocity_smoother/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/velocity_smoother/list_parameters": "rcl_interfaces/srv/ListParameters", + "/velocity_smoother/set_parameters": "rcl_interfaces/srv/SetParameters", + "/velocity_smoother/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/waypoint_follower/change_state": "lifecycle_msgs/srv/ChangeState", + "/waypoint_follower/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/waypoint_follower/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", + "/waypoint_follower/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", + "/waypoint_follower/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/waypoint_follower/get_parameters": "rcl_interfaces/srv/GetParameters", + "/waypoint_follower/get_state": "lifecycle_msgs/srv/GetState", + "/waypoint_follower/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", + "/waypoint_follower/list_parameters": "rcl_interfaces/srv/ListParameters", + "/waypoint_follower/set_parameters": "rcl_interfaces/srv/SetParameters", + "/waypoint_follower/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + # Lifecycle management services + "/lifecycle_manager_navigation/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/lifecycle_manager_navigation/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/lifecycle_manager_navigation/get_parameters": "rcl_interfaces/srv/GetParameters", + "/lifecycle_manager_navigation/is_active": "std_srvs/srv/Trigger", + "/lifecycle_manager_navigation/list_parameters": "rcl_interfaces/srv/ListParameters", + "/lifecycle_manager_navigation/manage_nodes": "nav2_msgs/srv/ManageLifecycleNodes", + "/lifecycle_manager_navigation/set_parameters": "rcl_interfaces/srv/SetParameters", + "/lifecycle_manager_navigation/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", + "/lifecycle_manager_slam/describe_parameters": "rcl_interfaces/srv/DescribeParameters", + "/lifecycle_manager_slam/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", + "/lifecycle_manager_slam/get_parameters": "rcl_interfaces/srv/GetParameters", + "/lifecycle_manager_slam/is_active": "std_srvs/srv/Trigger", + "/lifecycle_manager_slam/list_parameters": "rcl_interfaces/srv/ListParameters", + "/lifecycle_manager_slam/manage_nodes": "nav2_msgs/srv/ManageLifecycleNodes", + "/lifecycle_manager_slam/set_parameters": "rcl_interfaces/srv/SetParameters", + "/lifecycle_manager_slam/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", +} +CUSTOM_SERVICES_AND_TYPES: Dict[str, str] = { + "/grounded_sam_segment": "rai_interfaces/srv/RAIGroundedSam", + "/grounding_dino_classify": "rai_interfaces/srv/RAIGroundingDino", + "/manipulator_move_to": "rai_interfaces/srv/ManipulatorMoveTo", + "/get_log_digest": "rai_interfaces/srv/StringList", + "/rai_whoami_documentation_service": "rai_interfaces/srv/VectorStoreRetrieval", + "/rai/whatisee/get": "rai_interfaces/srv/WhatISee", +} + +MANIPULATION_ACTIONS_AND_TYPES: Dict[str, str] = { + "/move_action": "moveit_msgs/action/MoveGroup", + "/execute_trajectory": "moveit_msgs/action/ExecuteTrajectory", + "/panda_arm_controller/follow_joint_trajectory": "control_msgs/action/FollowJointTrajectory", + "/arm_controller/follow_joint_trajectory": "control_msgs/action/FollowJointTrajectory", + "/panda_hand_controller/gripper_cmd": "control_msgs/action/GripperCommand", + "/gripper_controller/gripper_cmd": "control_msgs/action/GripperCommand", + "/pickup": "moveit_msgs/action/Pickup", + "/place": "moveit_msgs/action/Place", +} +NAVIGATION_ACTIONS_AND_TYPES: Dict[str, str] = { + "/navigate_to_pose": "nav2_msgs/action/NavigateToPose", + "/navigate_through_poses": "nav2_msgs/action/Nmoveit_msgs/action/MoveGroupmoveit_msgs/action/MoveGroupavigateThroughPoses", + "/follow_path": "nav2_msgs/action/FollowPath", + "/follow_waypoints": "nav2_msgs/action/FollowWaypoints", + "/compute_path_to_pose": "nav2_msgs/action/ComputePathToPose", + "/compute_path_through_poses": "nav2_msgs/action/ComputePathThroughPoses", + "/smooth_path": "nav2_msgs/action/SmoothPath", + "/spin": "nav2_msgs/action/Spin", + "/backup": "nav2_msgs/action/BackUp", + "/drive_on_heading": "nav2_msgs/action/DriveOnHeading", + "/wait": "nav2_msgs/action/Wait", + "/assisted_teleop": "nav2_msgs/action/AssistedTeleop", + "/clear_costmap": "nav2_msgs/action/ClearEntireCostmap", +} +COMMON_TOPIC_MODELS: Dict[str, Type[BaseModel]] = { + "sensor_msgs/msg/CameraInfo": CameraInfo, + "sensor_msgs/msg/Image": Image, + "rosgraph_msgs/msg/Clock": Clock, +} + +CUSTOM_TOPIC_MODELS: Dict[str, Type[BaseModel]] = { + "rai_interfaces/msg/HRIMessage": HRIMessage, + "rai_interfaces/msg/AudioMessage": AudioMessage, + "rai_interfaces/msg/RAIDetectionArray": RAIDetectionArray, +} + +CUSTOM_SERVICE_MODELS: Dict[str, Type[BaseModel]] = { + "rai_interfaces/srv/ManipulatorMoveTo": ManipulatorMoveToRequest, + "rai_interfaces/srv/RAIGroundedSam": RAIGroundedSamRequest, + "rai_interfaces/srv/RAIGroundingDino": RAIGroundingDinoRequest, + "rai_interfaces/srv/StringList": StringListRequest, + "rai_interfaces/srv/VectorStoreRetrieval": VectorStoreRetrievalRequest, + "rai_interfaces/srv/WhatISee": WhatISeeRequest, +} +MANIPULATION_ACTION_MODELS: Dict[str, Type[BaseModel]] = {} +NAVIGATION_ACTION_MODELS: Dict[str, Type[BaseModel]] = { + "nav2_msgs/action/NavigateToPose": NavigateToPoseGoal, + "nav2_msgs/action/Spin": SpinGoal, + "nav2_msgs/action/AssistedTeleop": AssistedTeleopGoal, + "nav2_msgs/action/BackUp": BackUpGoal, + "nav2_msgs/action/ComputePathThroughPoses": ComputePathThroughPosesGoal, + "nav2_msgs/action/ComputePathToPose": ComputePathToPoseGoal, + "nav2_msgs/action/DriveOnHeading": DriveOnHeadingGoal, + "nav2_msgs/action/FollowPath": FollowPathGoal, + "nav2_msgs/action/FollowWaypoints": FollowWaypointsGoal, + "nav2_msgs/action/NavigateThroughPoses": NavigateThroughPosesGoal, + "nav2_msgs/action/SmoothPath": SmoothPathGoal, + "nav2_msgs/action/Wait": WaitGoal, +} diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/__init__.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/__init__.py new file mode 100644 index 000000000..953dea515 --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/__init__.py @@ -0,0 +1,27 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .basic_tasks import get_basic_tasks +from .custom_interfaces_tasks import get_custom_interfaces_tasks +from .manipulation_tasks import get_manipulation_tasks +from .navigation_tasks import get_navigation_tasks +from .spatial_reasoning_tasks import get_spatial_tasks + +__all__ = [ + "get_basic_tasks", + "get_custom_interfaces_tasks", + "get_manipulation_tasks", + "get_navigation_tasks", + "get_spatial_tasks", +] diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/basic_tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/basic_tasks.py new file mode 100644 index 000000000..2a2ae7e88 --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/basic_tasks.py @@ -0,0 +1,306 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Literal + +from rai_bench.tool_calling_agent.interfaces import ( + Task, + TaskArgs, +) +from rai_bench.tool_calling_agent.subtasks import ( + CheckArgsToolCallSubTask, +) +from rai_bench.tool_calling_agent.tasks.basic import ( + AssessSensorDataQualityTask, + CheckRobotHealthTask, + GetAllROS2CamerasTask, + GetPointcloudTask, + GetRobotDescriptionTask, + GetROS2DepthCameraTask, + GetROS2RGBCameraTask, + GetROS2TopicsTask, +) +from rai_bench.tool_calling_agent.validators import ( + NotOrderedCallsValidator, + OrderedCallsValidator, +) + +########## SUBTASKS ################################################################# +get_topics_subtask = CheckArgsToolCallSubTask( + expected_tool_name="get_ros2_topics_names_and_types", expected_args={} +) + +color_image5_subtask = CheckArgsToolCallSubTask( + expected_tool_name="get_ros2_image", + expected_args={"topic": "/color_image5"}, + expected_optional_args={"timeout_sec": int}, +) +depth_image5_subtask = CheckArgsToolCallSubTask( + expected_tool_name="get_ros2_image", + expected_args={"topic": "/depth_image5"}, + expected_optional_args={"timeout_sec": int}, +) + +color_camera_info5_subtask = CheckArgsToolCallSubTask( + expected_tool_name="get_ros2_image", + expected_args={"topic": "/color_image5"}, + expected_optional_args={"timeout_sec": int}, +) +depth_camera_info5_subtask = CheckArgsToolCallSubTask( + expected_tool_name="get_ros2_image", + expected_args={"topic": "/depth_image5"}, + expected_optional_args={"timeout_sec": int}, +) + +receive_robot_desc_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description"}, + expected_optional_args={"timeout_sec": int}, +) + +receive_pointcloud_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/pointcloud"}, + expected_optional_args={"timeout_sec": int}, +) + +# System health subtasks +diagnostics_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/diagnostics"}, + expected_optional_args={"timeout_sec": int}, +) +rosout_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/rosout"}, + expected_optional_args={"timeout_sec": int}, +) +joint_states_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/joint_states"}, + expected_optional_args={"timeout_sec": int}, +) + +# Odometry subtasks +odom_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/odom"}, + expected_optional_args={"timeout_sec": int}, +) +filtered_odom_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/odometry/filtered"}, + expected_optional_args={"timeout_sec": int}, +) + +# Transform subtasks +tf_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/tf"}, + expected_optional_args={"timeout_sec": int}, +) +tf_static_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/tf_static"}, + expected_optional_args={"timeout_sec": int}, +) + + +# Robot description subtasks +robot_description_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description"}, + expected_optional_args={"timeout_sec": int}, +) +robot_description_semantic_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description_semantic"}, + expected_optional_args={"timeout_sec": int}, +) + +# Sensor data subtasks +scan_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/scan"}, + expected_optional_args={"timeout_sec": int}, +) +pointcloud_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/pointcloud"}, + expected_optional_args={"timeout_sec": int}, +) + + +# Robot description subtasks +robot_description_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description"}, + expected_optional_args={"timeout_sec": int}, +) +robot_description_semantic_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description_semantic"}, + expected_optional_args={"timeout_sec": int}, +) + +# Sensor data subtasks +scan_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/scan"}, + expected_optional_args={"timeout_sec": int}, +) +pointcloud_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/pointcloud"}, + expected_optional_args={"timeout_sec": int}, +) + +# Robot description subtasks +robot_description_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description"}, + expected_optional_args={"timeout_sec": int}, +) +robot_description_semantic_subtask = CheckArgsToolCallSubTask( + expected_tool_name="receive_ros2_message", + expected_args={"topic": "/robot_description_semantic"}, + expected_optional_args={"timeout_sec": int}, +) + +######### VALIDATORS ######################################################################################### +topics_ord_val = OrderedCallsValidator(subtasks=[get_topics_subtask]) + +color_image_ord_val = OrderedCallsValidator(subtasks=[color_image5_subtask]) +depth_image_ord_val = OrderedCallsValidator(subtasks=[depth_image5_subtask]) + +color_camera_info_ord_val = OrderedCallsValidator(subtasks=[color_camera_info5_subtask]) +depth_camera_info_ord_val = OrderedCallsValidator(subtasks=[depth_camera_info5_subtask]) + +color_image_with_info_ord_val = NotOrderedCallsValidator( + subtasks=[color_image5_subtask, color_camera_info5_subtask] +) +depth_image_with_info_ord_val = NotOrderedCallsValidator( + subtasks=[depth_image5_subtask, color_camera_info5_subtask] +) + +all_camera_images_notord_val = NotOrderedCallsValidator( + subtasks=[ + color_image5_subtask, + depth_image5_subtask, + ] +) +all_camera_info_notord_val = NotOrderedCallsValidator( + subtasks=[ + color_camera_info5_subtask, + depth_camera_info5_subtask, + ] +) +all_camera_images_with_info_notord_val = NotOrderedCallsValidator( + subtasks=[ + color_image5_subtask, + depth_image5_subtask, + color_camera_info5_subtask, + depth_camera_info5_subtask, + ] +) + +joint_states_ord_val = OrderedCallsValidator(subtasks=[joint_states_subtask]) +diagnostics_ord_val = OrderedCallsValidator(subtasks=[diagnostics_subtask]) + +get_pointcloud_ord_val = OrderedCallsValidator(subtasks=[receive_pointcloud_subtask]) +get_robot_desc_ord_val = OrderedCallsValidator(subtasks=[receive_robot_desc_subtask]) + +robot_health_val = NotOrderedCallsValidator( + subtasks=[diagnostics_subtask, joint_states_subtask, rosout_subtask] +) + +odometry_comparison_val = NotOrderedCallsValidator( + subtasks=[odom_subtask, filtered_odom_subtask] +) +sensor_data_val = NotOrderedCallsValidator( + subtasks=[ + scan_subtask, + receive_pointcloud_subtask, + color_image5_subtask, + depth_image5_subtask, + color_camera_info5_subtask, + depth_camera_info5_subtask, + ] +) + + +def get_basic_tasks( + extra_tool_calls: List[int] = [0], + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"], + n_shots: List[Literal[0, 2, 5]] = [0, 2, 5], +) -> List[Task]: + """Get predefined basic tasks. + + Parameters + ---------- + Parameters match :class:`~src.rai_bench.rai_bench.test_models.ToolCallingAgentBenchmarkConfig`. + See the class documentation for parameter descriptions. + + Returns + ------- + Returned list match :func:`~src.rai_bench.rai_bench.tool_calling_agent.predefined.tasks.get_tasks`. + """ + tasks: List[Task] = [] + + # Generate all combinations of prompt_detail and n_shots and extra tool calls + for extra_calls in extra_tool_calls: + for detail in prompt_detail: + for shots in n_shots: + task_args = TaskArgs( + extra_tool_calls=extra_calls, + prompt_detail=detail, + examples_in_system_prompt=shots, + ) + + tasks.extend( + [ + GetROS2RGBCameraTask( + validators=[color_image_ord_val], + task_args=task_args, + ), + GetROS2TopicsTask( + validators=[topics_ord_val], + task_args=task_args, + ), + GetROS2DepthCameraTask( + validators=[depth_image_ord_val], + task_args=task_args, + ), + GetAllROS2CamerasTask( + validators=[all_camera_images_notord_val], + task_args=task_args, + ), + GetPointcloudTask( + validators=[get_pointcloud_ord_val], task_args=task_args + ), + GetRobotDescriptionTask( + validators=[get_robot_desc_ord_val], task_args=task_args + ), + CheckRobotHealthTask( + validators=[robot_health_val], + task_args=task_args, + ), + AssessSensorDataQualityTask( + validators=[sensor_data_val], + task_args=task_args, + ), + ] + ) + + return tasks diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/custom_interfaces_tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/custom_interfaces_tasks.py new file mode 100644 index 000000000..36f84a31b --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/custom_interfaces_tasks.py @@ -0,0 +1,95 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Literal + +from rai_bench.tool_calling_agent.interfaces import ( + Task, + TaskArgs, +) +from rai_bench.tool_calling_agent.subtasks import ( + CheckArgsToolCallSubTask, + CheckTopicFieldsToolCallSubTask, +) +from rai_bench.tool_calling_agent.tasks.custom_interfaces import ( + PublishROS2HRIMessageTextTask, +) +from rai_bench.tool_calling_agent.validators import ( + OrderedCallsValidator, +) + +########## SUBTASKS ################################################################# +pub_HRIMessage_text_subtask = CheckTopicFieldsToolCallSubTask( + expected_tool_name="publish_ros2_message", + expected_topic="/to_human", + expected_message_type="rai_interfaces/msg/HRIMessage", + expected_fields={"text": "Hello!"}, +) + +get_HRIMessage_interface_subtask = CheckArgsToolCallSubTask( + expected_tool_name="get_ros2_message_interface", + expected_args={"msg_type": "rai_interfaces/msg/HRIMessage"}, +) + + +######### VALIDATORS ######################################################################################### +pub_HRIMessage_text_ord_val = OrderedCallsValidator( + subtasks=[pub_HRIMessage_text_subtask] +) +get_interface_publish_ord_val = OrderedCallsValidator( + subtasks=[ + get_HRIMessage_interface_subtask, + pub_HRIMessage_text_subtask, + ] +) + + +def get_custom_interfaces_tasks( + extra_tool_calls: List[int] = [0], + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"], + n_shots: List[Literal[0, 2, 5]] = [0, 2, 5], +) -> List[Task]: + """Get predefined custom_interfaces tasks. + + Parameters + ---------- + Parameters match :class:`~src.rai_bench.rai_bench.test_models.ToolCallingAgentBenchmarkConfig`. + See the class documentation for parameter descriptions. + + Returns + ------- + Returned list match :func:`~src.rai_bench.rai_bench.tool_calling_agent.predefined.tasks.get_tasks`. + """ + tasks: List[Task] = [] + + for extra_calls in extra_tool_calls: + for detail in prompt_detail: + for shots in n_shots: + task_args = TaskArgs( + extra_tool_calls=extra_calls, + prompt_detail=detail, + examples_in_system_prompt=shots, + ) + tasks.append( + PublishROS2HRIMessageTextTask( + topic="/to_human", + validators=[ + get_interface_publish_ord_val, + ], + task_args=task_args, + text="Hello!", + ), + ) + + return tasks diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/manipulation_tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/manipulation_tasks.py new file mode 100644 index 000000000..7e4068c8f --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/manipulation_tasks.py @@ -0,0 +1,104 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Literal + +from rai.tools.ros2 import MoveToPointToolInput +from rai.types import Point + +from rai_bench.tool_calling_agent.interfaces import ( + Task, + TaskArgs, +) +from rai_bench.tool_calling_agent.subtasks import ( + CheckArgsToolCallSubTask, +) +from rai_bench.tool_calling_agent.tasks.manipulation import ( + MoveToPointTask, +) +from rai_bench.tool_calling_agent.validators import ( + OrderedCallsValidator, +) + +########## SUBTASKS ################################################################# +move_to_point_subtask_grab = CheckArgsToolCallSubTask( + expected_tool_name="move_to_point", + expected_args={"x": 1.0, "y": 2.0, "z": 3.0, "task": "grab"}, +) +move_to_point_subtask_drop = CheckArgsToolCallSubTask( + expected_tool_name="move_to_point", + expected_args={"x": 1.2, "y": 2.3, "z": 3.4, "task": "drop"}, +) + +######### VALIDATORS ######################################################################################### +move_to_point_ord_val_grab = OrderedCallsValidator( + subtasks=[move_to_point_subtask_grab] +) +move_to_point_ord_val_drop = OrderedCallsValidator( + subtasks=[move_to_point_subtask_drop] +) + + +def get_manipulation_tasks( + extra_tool_calls: List[int] = [0], + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"], + n_shots: List[Literal[0, 2, 5]] = [0, 2, 5], +) -> List[Task]: + """Get predefined manipulation tasks. + + Parameters + ---------- + Parameters match :class:`~src.rai_bench.rai_bench.test_models.ToolCallingAgentBenchmarkConfig`. + See the class documentation for parameter descriptions. + + Returns + ------- + Returned list match :func:`~src.rai_bench.rai_bench.tool_calling_agent.predefined.tasks.get_tasks`. + """ + tasks: List[Task] = [] + + objects = { + "banana": [Point(x=0.1, y=0.2, z=0.3), Point(x=0.4, y=0.5, z=0.6)], + "cube": [Point(x=0.7, y=0.8, z=0.9)], + } + for extra_calls in extra_tool_calls: + for detail in prompt_detail: + for shots in n_shots: + task_args = TaskArgs( + extra_tool_calls=extra_calls, + prompt_detail=detail, + examples_in_system_prompt=shots, + ) + tasks.extend( + [ + MoveToPointTask( + objects=objects, + move_to_tool_input=MoveToPointToolInput( + x=1.0, y=2.0, z=3.0, task="grab" + ), + validators=[move_to_point_ord_val_grab], + task_args=task_args, + ), + MoveToPointTask( + objects=objects, + move_to_tool_input=MoveToPointToolInput( + x=1.2, y=2.3, z=3.4, task="drop" + ), + validators=[move_to_point_ord_val_drop], + task_args=task_args, + ), + ] + ) + + return tasks diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/navigation_tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/navigation_tasks.py new file mode 100644 index 000000000..4099e5a22 --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/navigation_tasks.py @@ -0,0 +1,118 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Literal + +from rai_bench.tool_calling_agent.interfaces import ( + Task, + TaskArgs, +) +from rai_bench.tool_calling_agent.subtasks import ( + CheckActionFieldsToolCallSubTask, +) +from rai_bench.tool_calling_agent.tasks.navigation import ( + MoveToBedTask, + MoveToFrontTask, + NavigateToPointTask, + SpinAroundTask, +) +from rai_bench.tool_calling_agent.validators import ( + OrderedCallsValidator, +) + +########## SUBTASKS ################################################################# + +start_nav_action_subtask = CheckActionFieldsToolCallSubTask( + expected_tool_name="start_ros2_action", + expected_action="/navigate_to_pose", + expected_action_type="nav2_msgs/action/NavigateToPose", + expected_fields={ + "pose": { + "header": {"frame_id": "map"}, + "pose": { + "position": {"x": 2.0, "y": 2.0, "z": 0.0}, + }, + }, + }, +) +start_spin_action_subtask = CheckActionFieldsToolCallSubTask( + expected_tool_name="start_ros2_action", + expected_action="/spin", + expected_action_type="nav2_msgs/action/Spin", + expected_fields={"target_yaw": 3}, +) +start_move_front_action_subtask = CheckActionFieldsToolCallSubTask( + expected_tool_name="start_ros2_action", + expected_action="/drive_on_heading", + expected_action_type="nav2_msgs/action/DriveOnHeading", + expected_fields={ + "target": {"y": 0.0, "z": 0.0}, + }, +) +######### VALIDATORS ######################################################################################### +start_navigate_action_ord_val = OrderedCallsValidator( + subtasks=[start_nav_action_subtask] +) +start_spin_action_ord_val = OrderedCallsValidator(subtasks=[start_spin_action_subtask]) +move_ahead_ord_val = OrderedCallsValidator(subtasks=[start_move_front_action_subtask]) + + +def get_navigation_tasks( + extra_tool_calls: List[int] = [0], + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"], + n_shots: List[Literal[0, 2, 5]] = [0, 2, 5], +) -> List[Task]: + """Get predefined navigation tasks. + + Parameters + ---------- + Parameters match :class:`~src.rai_bench.rai_bench.test_models.ToolCallingAgentBenchmarkConfig`. + See the class documentation for parameter descriptions. + + Returns + ------- + Returned list match :func:`~src.rai_bench.rai_bench.tool_calling_agent.predefined.tasks.get_tasks`. + """ + tasks: List[Task] = [] + + for extra_calls in extra_tool_calls: + for detail in prompt_detail: + for shots in n_shots: + task_args = TaskArgs( + extra_tool_calls=extra_calls, + prompt_detail=detail, + examples_in_system_prompt=shots, + ) + tasks.extend( + [ + NavigateToPointTask( + validators=[start_navigate_action_ord_val], + task_args=task_args, + ), + SpinAroundTask( + validators=[start_spin_action_ord_val], + task_args=task_args, + ), + MoveToBedTask( + validators=[move_ahead_ord_val], + task_args=task_args, + ), + MoveToFrontTask( + validators=[move_ahead_ord_val], + task_args=task_args, + ), + ] + ) + + return tasks diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/spatial_reasoning_tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/spatial_reasoning_tasks.py new file mode 100644 index 000000000..f67d19792 --- /dev/null +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/spatial_reasoning_tasks.py @@ -0,0 +1,278 @@ +# Copyright (C) 2025 Robotec.AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Literal, Sequence + +from rai_bench.tool_calling_agent.interfaces import ( + Task, + TaskArgs, +) +from rai_bench.tool_calling_agent.subtasks import ( + CheckArgsToolCallSubTask, +) +from rai_bench.tool_calling_agent.tasks.spatial import ( + BoolImageTaskEasy, + BoolImageTaskHard, + BoolImageTaskInput, + BoolImageTaskMedium, +) +from rai_bench.tool_calling_agent.validators import ( + OrderedCallsValidator, +) + +IMG_PATH = "src/rai_bench/rai_bench/tool_calling_agent/predefined/images/" +true_response_inputs: List[BoolImageTaskInput] = [ + BoolImageTaskInput( + question="Is the door on the left from the desk?", + images_paths=[IMG_PATH + "image_1.jpg"], + ), + BoolImageTaskInput( + question="Is the light on in the room?", + images_paths=[IMG_PATH + "image_2.jpg"], + ), + BoolImageTaskInput( + question="Do you see the plant?", + images_paths=[IMG_PATH + "image_2.jpg"], + ), + BoolImageTaskInput( + question="Are there any pictures on the wall?", + images_paths=[IMG_PATH + "image_3.jpg"], + ), + BoolImageTaskInput( + question="Are there 3 pictures on the wall?", + images_paths=[IMG_PATH + "image_4.jpg"], + ), + BoolImageTaskInput( + question="Is there a plant behind the rack?", + images_paths=[IMG_PATH + "image_5.jpg"], + ), + BoolImageTaskInput( + question="Is there a pillow on the armchain?", + images_paths=[IMG_PATH + "image_7.jpg"], + ), +] +false_response_inputs: List[BoolImageTaskInput] = [ + BoolImageTaskInput( + question="Is the door open?", + images_paths=[IMG_PATH + "image_1.jpg"], + ), + BoolImageTaskInput( + question="Is someone in the room?", + images_paths=[IMG_PATH + "image_1.jpg"], + ), + BoolImageTaskInput( + question="Do you see the plant?", + images_paths=[IMG_PATH + "image_3.jpg"], + ), + BoolImageTaskInput( + question="Are there 4 pictures on the wall?", + images_paths=[IMG_PATH + "image_4.jpg"], + ), + BoolImageTaskInput( + question="Is there a rack on the left from the sofa?", + images_paths=[IMG_PATH + "image_4.jpg"], + ), + BoolImageTaskInput( + question="Is there a plant on the right from the window?", + images_paths=[IMG_PATH + "image_6.jpg"], + ), + BoolImageTaskInput( + question="Is there a red pillow on the armchair?", + images_paths=[IMG_PATH + "image_7.jpg"], + ), +] +########## SUBTASKS ################################################################# +return_true_subtask = CheckArgsToolCallSubTask( + expected_tool_name="return_bool_response", expected_args={"response": True} +) +return_false_subtask = CheckArgsToolCallSubTask( + expected_tool_name="return_bool_response", expected_args={"response": False} +) + +######### VALIDATORS ######################################################################################### +ret_true_ord_val = OrderedCallsValidator(subtasks=[return_true_subtask]) +ret_false_ord_val = OrderedCallsValidator(subtasks=[return_false_subtask]) + + +def get_spatial_tasks( + extra_tool_calls: List[int] = [0], + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"], + n_shots: List[Literal[0, 2, 5]] = [0, 2, 5], +) -> Sequence[Task]: + """Get predefined spatial reasoning tasks. + + Parameters + ---------- + Parameters match :class:`~src.rai_bench.rai_bench.test_models.ToolCallingAgentBenchmarkConfig`. + See the class documentation for parameter descriptions. + + Returns + ------- + Returned list match :func:`~src.rai_bench.rai_bench.tool_calling_agent.predefined.tasks.get_tasks`. + """ + tasks: List[Task] = [] + + # Categorize tasks by complexity based on question difficulty + easy_true_inputs = [ + # Single object presence/detection + BoolImageTaskInput( + question="Is the light on in the room?", + images_paths=[IMG_PATH + "image_2.jpg"], + ), + BoolImageTaskInput( + question="Do you see the plant?", images_paths=[IMG_PATH + "image_2.jpg"] + ), + BoolImageTaskInput( + question="Are there any pictures on the wall?", + images_paths=[IMG_PATH + "image_3.jpg"], + ), + BoolImageTaskInput( + question="Is there a pillow on the armchain?", + images_paths=[IMG_PATH + "image_7.jpg"], + ), + ] + + medium_true_inputs = [ + # Object state or counting + BoolImageTaskInput( + question="Are there 3 pictures on the wall?", + images_paths=[IMG_PATH + "image_4.jpg"], + ), + ] + + hard_true_inputs = [ + # Spatial relationships between objects + BoolImageTaskInput( + question="Is the door on the left from the desk?", + images_paths=[IMG_PATH + "image_1.jpg"], + ), + BoolImageTaskInput( + question="Is there a plant behind the rack?", + images_paths=[IMG_PATH + "image_5.jpg"], + ), + ] + + easy_false_inputs = [ + # Single object presence/detection + BoolImageTaskInput( + question="Is someone in the room?", images_paths=[IMG_PATH + "image_1.jpg"] + ), + BoolImageTaskInput( + question="Do you see the plant?", images_paths=[IMG_PATH + "image_3.jpg"] + ), + BoolImageTaskInput( + question="Is there a red pillow on the armchair?", + images_paths=[IMG_PATH + "image_7.jpg"], + ), + ] + + medium_false_inputs = [ + # Object state or counting + BoolImageTaskInput( + question="Is the door open?", images_paths=[IMG_PATH + "image_1.jpg"] + ), + BoolImageTaskInput( + question="Are there 4 pictures on the wall?", + images_paths=[IMG_PATH + "image_4.jpg"], + ), + ] + + hard_false_inputs = [ + # Spatial relationships between objects + BoolImageTaskInput( + question="Is there a rack on the left from the sofa?", + images_paths=[IMG_PATH + "image_4.jpg"], + ), + BoolImageTaskInput( + question="Is there a plant on the right from the window?", + images_paths=[IMG_PATH + "image_6.jpg"], + ), + ] + + for extra_calls in extra_tool_calls: + for detail in prompt_detail: + for shots in n_shots: + task_args = TaskArgs( + extra_tool_calls=extra_calls, + prompt_detail=detail, + examples_in_system_prompt=shots, + ) + + tasks.extend( + [ + BoolImageTaskEasy( + task_input=input_item, + validators=[ret_true_ord_val], + task_args=task_args, + ) + for input_item in easy_true_inputs + ] + ) + + tasks.extend( + [ + BoolImageTaskEasy( + task_input=input_item, + validators=[ret_false_ord_val], + task_args=task_args, + ) + for input_item in easy_false_inputs + ] + ) + + tasks.extend( + [ + BoolImageTaskMedium( + task_input=input_item, + validators=[ret_true_ord_val], + task_args=task_args, + ) + for input_item in medium_true_inputs + ] + ) + + tasks.extend( + [ + BoolImageTaskMedium( + task_input=input_item, + validators=[ret_false_ord_val], + task_args=task_args, + ) + for input_item in medium_false_inputs + ] + ) + + tasks.extend( + [ + BoolImageTaskHard( + task_input=input_item, + validators=[ret_true_ord_val], + task_args=task_args, + ) + for input_item in hard_true_inputs + ] + ) + + tasks.extend( + [ + BoolImageTaskHard( + task_input=input_item, + validators=[ret_false_ord_val], + task_args=task_args, + ) + for input_item in hard_false_inputs + ] + ) + + return tasks diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/tasks.py index cdeefc8be..f3a166c8a 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/predefined/tasks.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/predefined/tasks.py @@ -13,407 +13,25 @@ # limitations under the License. import random -from typing import List, Literal, Sequence - -from rai.tools.ros2 import MoveToPointToolInput +from typing import List, Literal from rai_bench.tool_calling_agent.interfaces import ( Task, ) -from rai_bench.tool_calling_agent.subtasks import ( - CheckActionFieldsToolCallSubTask, - CheckArgsToolCallSubTask, - CheckTopicFieldsToolCallSubTask, -) -from rai_bench.tool_calling_agent.tasks.basic import ( - GetAllROS2DepthCamerasTask, - GetAllROS2RGBCamerasTask, - GetROS2DepthCameraTask, - GetROS2RGBCameraTask, - GetROS2TopicsTask, -) -from rai_bench.tool_calling_agent.tasks.custom_interfaces import ( - PublishROS2HRIMessageTextTask, -) -from rai_bench.tool_calling_agent.tasks.manipulation import ( - MoveToPointTask, -) -from rai_bench.tool_calling_agent.tasks.navigation import ( - MoveToBedTask, - MoveToFrontTask, - NavigateToPointTask, - SpinAroundTask, -) -from rai_bench.tool_calling_agent.tasks.spatial import ( - BoolImageTask, - BoolImageTaskInput, -) -from rai_bench.tool_calling_agent.validators import ( - NotOrderedCallsValidator, - OrderedCallsValidator, -) - -IMG_PATH = "src/rai_bench/rai_bench/tool_calling_agent/predefined/images/" -true_response_inputs: List[BoolImageTaskInput] = [ - BoolImageTaskInput( - question="Is the door on the left from the desk?", - images_paths=[IMG_PATH + "image_1.jpg"], - ), - BoolImageTaskInput( - question="Is the light on in the room?", - images_paths=[IMG_PATH + "image_2.jpg"], - ), - BoolImageTaskInput( - question="Do you see the plant?", - images_paths=[IMG_PATH + "image_2.jpg"], - ), - BoolImageTaskInput( - question="Are there any pictures on the wall?", - images_paths=[IMG_PATH + "image_3.jpg"], - ), - BoolImageTaskInput( - question="Are there 3 pictures on the wall?", - images_paths=[IMG_PATH + "image_4.jpg"], - ), - BoolImageTaskInput( - question="Is there a plant behind the rack?", - images_paths=[IMG_PATH + "image_5.jpg"], - ), - BoolImageTaskInput( - question="Is there a pillow on the armchain?", - images_paths=[IMG_PATH + "image_7.jpg"], - ), -] -false_response_inputs: List[BoolImageTaskInput] = [ - BoolImageTaskInput( - question="Is the door open?", - images_paths=[IMG_PATH + "image_1.jpg"], - ), - BoolImageTaskInput( - question="Is someone in the room?", - images_paths=[IMG_PATH + "image_1.jpg"], - ), - BoolImageTaskInput( - question="Do you see the plant?", - images_paths=[IMG_PATH + "image_3.jpg"], - ), - BoolImageTaskInput( - question="Are there 4 pictures on the wall?", - images_paths=[IMG_PATH + "image_4.jpg"], - ), - BoolImageTaskInput( - question="Is there a rack on the left from the sofa?", - images_paths=[IMG_PATH + "image_4.jpg"], - ), - BoolImageTaskInput( - question="Is there a plant on the right from the window?", - images_paths=[IMG_PATH + "image_6.jpg"], - ), - BoolImageTaskInput( - question="Is there a red pillow on the armchair?", - images_paths=[IMG_PATH + "image_7.jpg"], - ), -] -########## SUBTASKS ####################################################################################### -get_topics_subtask = CheckArgsToolCallSubTask( - expected_tool_name="get_ros2_topics_names_and_types", expected_args={} -) - -color_image5_subtask = CheckArgsToolCallSubTask( - expected_tool_name="get_ros2_image", - expected_args={"topic": "/color_image5"}, - expected_optional_args={"timeout_sec": int}, -) -depth_image5_subtask = CheckArgsToolCallSubTask( - expected_tool_name="get_ros2_image", - expected_args={"topic": "/depth_image5"}, - expected_optional_args={"timeout_sec": int}, -) - -receive_robot_desc_subtask = CheckArgsToolCallSubTask( - expected_tool_name="receive_ros2_message", - expected_args={"topic": "/robot_description"}, - expected_optional_args={"timeout_sec": int}, -) - -move_to_point_subtask_grab = CheckArgsToolCallSubTask( - expected_tool_name="move_to_point", - expected_args={"x": 1.0, "y": 2.0, "z": 3.0, "task": "grab"}, -) -move_to_point_subtask_drop = CheckArgsToolCallSubTask( - expected_tool_name="move_to_point", - expected_args={"x": 1.2, "y": 2.3, "z": 3.4, "task": "drop"}, +from rai_bench.tool_calling_agent.predefined import ( + get_basic_tasks, + get_custom_interfaces_tasks, + get_manipulation_tasks, + get_navigation_tasks, + get_spatial_tasks, ) -pub_HRIMessage_text_subtask = CheckTopicFieldsToolCallSubTask( - expected_tool_name="publish_ros2_message", - expected_topic="/to_human", - expected_message_type="rai_interfaces/msg/HRIMessage", - expected_fields={"text": "Hello!"}, -) - -get_tohuman_interface_subtask = CheckArgsToolCallSubTask( - expected_tool_name="get_ros2_message_interface", - expected_args={"msg_type": "rai_interfaces/msg/HRIMessage"}, -) - - -return_true_subtask = CheckArgsToolCallSubTask( - expected_tool_name="return_bool_response", expected_args={"response": True} -) -return_false_subtask = CheckArgsToolCallSubTask( - expected_tool_name="return_bool_response", expected_args={"response": False} -) - -start_nav_action_subtask = CheckActionFieldsToolCallSubTask( - expected_tool_name="start_ros2_action", - expected_action="/navigate_to_pose", - expected_action_type="nav2_msgs/action/NavigateToPose", - expected_fields={ - "pose": { - "header": {"frame_id": "map"}, - "pose": { - "position": {"x": 2.0, "y": 2.0, "z": 0.0}, - }, - }, - }, -) -start_spin_action_subtask = CheckActionFieldsToolCallSubTask( - expected_tool_name="start_ros2_action", - expected_action="/spin", - expected_action_type="nav2_msgs/action/Spin", - expected_fields={"target_yaw": 3}, -) -start_move_front_action_subtask = CheckActionFieldsToolCallSubTask( - expected_tool_name="start_ros2_action", - expected_action="/drive_on_heading", - expected_action_type="nav2_msgs/action/DriveOnHeading", - expected_fields={ - "target": {"y": 0.0, "z": 0.0}, - }, -) -######### VALIDATORS ######################################################################################### -topics_ord_val = OrderedCallsValidator(subtasks=[get_topics_subtask]) -topics_and_color_image_ord_val = OrderedCallsValidator( - subtasks=[ - get_topics_subtask, - color_image5_subtask, - ] -) -color_image_ord_val = OrderedCallsValidator(subtasks=[color_image5_subtask]) -depth_image_ord_val = OrderedCallsValidator(subtasks=[depth_image5_subtask]) -all_color_images_notord_val = NotOrderedCallsValidator( - subtasks=[color_image5_subtask, color_image5_subtask] -) -all_depth_images_notord_val = NotOrderedCallsValidator( - subtasks=[depth_image5_subtask, depth_image5_subtask] -) - -move_to_point_ord_val_grab = OrderedCallsValidator( - subtasks=[move_to_point_subtask_grab] -) -move_to_point_ord_val_drop = OrderedCallsValidator( - subtasks=[move_to_point_subtask_drop] -) - -pub_HRIMessage_text_ord_val = OrderedCallsValidator( - subtasks=[pub_HRIMessage_text_subtask] -) - -list_topic_get_interface_publish_ord_val = OrderedCallsValidator( - subtasks=[ - get_topics_subtask, - get_tohuman_interface_subtask, - pub_HRIMessage_text_subtask, - ] -) - -ret_true_ord_val = OrderedCallsValidator(subtasks=[return_true_subtask]) -ret_false_ord_val = OrderedCallsValidator(subtasks=[return_false_subtask]) - -start_navigate_action_ord_val = OrderedCallsValidator( - subtasks=[start_nav_action_subtask] -) -start_spin_action_ord_val = OrderedCallsValidator(subtasks=[start_spin_action_subtask]) -move_ahead_ord_val = OrderedCallsValidator(subtasks=[start_move_front_action_subtask]) - -######### TASKS ############################################################################################ -basic_tasks: List[Task] = [ - # 3 options to validate same task: - # most strict, agent has to call both tool correctly to pass this validator - GetROS2RGBCameraTask(validators=[topics_and_color_image_ord_val]), - # verifing only if the GetCameraImage call was made properly - GetROS2RGBCameraTask(validators=[color_image_ord_val]), - # Soft verification. verifing in separate vaidators the list topic and get image. - # agent can get 0.5 score by only calling list topics - GetROS2RGBCameraTask(validators=[topics_ord_val, color_image_ord_val]), - # we can also add extra tool calls to allow model to correct itself - GetROS2RGBCameraTask( - validators=[topics_ord_val, color_image_ord_val], extra_tool_calls=3 - ), - GetROS2TopicsTask(validators=[topics_ord_val]), - GetROS2DepthCameraTask(validators=[depth_image_ord_val]), - GetAllROS2RGBCamerasTask(validators=[all_color_images_notord_val]), - GetAllROS2DepthCamerasTask(validators=[all_depth_images_notord_val]), -] -manipulation_tasks: List[Task] = [ - MoveToPointTask( - move_to_tool_input=MoveToPointToolInput(x=1.0, y=2.0, z=3.0, task="grab"), - validators=[move_to_point_ord_val_drop], - ), - MoveToPointTask( - move_to_tool_input=MoveToPointToolInput(x=1.2, y=2.3, z=3.4, task="drop"), - validators=[move_to_point_ord_val_drop], - ), -] - -custom_interfaces_tasks: List[Task] = [ - PublishROS2HRIMessageTextTask( - topic="/to_human", - text="Hello!", - validators=[pub_HRIMessage_text_ord_val], - extra_tool_calls=2, - ), - PublishROS2HRIMessageTextTask( - topic="/to_human", - text="Hello!", - validators=[list_topic_get_interface_publish_ord_val], - ), -] - -true_spatial_tasks: List[Task] = [ - BoolImageTask( - task_input=input_item, validators=[ret_true_ord_val], extra_tool_calls=0 - ) - for input_item in true_response_inputs -] -false_spatial_tasks: List[Task] = [ - BoolImageTask(task_input=input_item, validators=[ret_false_ord_val]) - for input_item in false_response_inputs -] - -navigation_tasks: List[Task] = [ - NavigateToPointTask(validators=[start_navigate_action_ord_val], extra_tool_calls=5), - SpinAroundTask(validators=[start_spin_action_ord_val], extra_tool_calls=5), - MoveToBedTask(validators=[move_ahead_ord_val], extra_tool_calls=5), - MoveToFrontTask(validators=[move_ahead_ord_val], extra_tool_calls=5), -] - - -def get_basic_tasks(extra_tool_calls: int = 0) -> List[Task]: - return [ - # 3 options to validate same task: - # most strict, agent has to call both tool correctly to pass this validator - GetROS2RGBCameraTask( - validators=[topics_and_color_image_ord_val], - extra_tool_calls=extra_tool_calls, - ), - # verifing only if the GetCameraImage call was made properly - GetROS2RGBCameraTask( - validators=[color_image_ord_val], extra_tool_calls=extra_tool_calls - ), - # Soft verification. verifing in separate vaidators the list topic and get image. - # agent can get 0.5 score by only calling list topics - GetROS2RGBCameraTask( - validators=[topics_ord_val, color_image_ord_val], - extra_tool_calls=extra_tool_calls, - ), - # we can also add extra tool calls to allow model to correct itself - GetROS2RGBCameraTask( - validators=[topics_ord_val, color_image_ord_val], - extra_tool_calls=extra_tool_calls, - ), - GetROS2TopicsTask( - validators=[topics_ord_val], extra_tool_calls=extra_tool_calls - ), - GetROS2DepthCameraTask( - validators=[depth_image_ord_val], extra_tool_calls=extra_tool_calls - ), - GetAllROS2RGBCamerasTask( - validators=[all_color_images_notord_val], extra_tool_calls=extra_tool_calls - ), - GetAllROS2DepthCamerasTask( - validators=[all_depth_images_notord_val], extra_tool_calls=extra_tool_calls - ), - ] - - -def get_navigation_tasks(extra_tool_calls: int = 0) -> List[Task]: - return [ - NavigateToPointTask( - validators=[start_navigate_action_ord_val], - extra_tool_calls=extra_tool_calls, - ), - SpinAroundTask( - validators=[start_spin_action_ord_val], - extra_tool_calls=extra_tool_calls, - ), - MoveToBedTask( - validators=[move_ahead_ord_val], - extra_tool_calls=extra_tool_calls, - ), - MoveToFrontTask( - validators=[move_ahead_ord_val], - extra_tool_calls=extra_tool_calls, - ), - ] - - -def get_manipulation_tasks(extra_tool_calls: int = 0) -> List[Task]: - return [ - MoveToPointTask( - move_to_tool_input=MoveToPointToolInput(x=1.0, y=2.0, z=3.0, task="grab"), - validators=[move_to_point_ord_val_grab], - extra_tool_calls=extra_tool_calls, - ), - MoveToPointTask( - move_to_tool_input=MoveToPointToolInput(x=1.2, y=2.3, z=3.4, task="drop"), - validators=[move_to_point_ord_val_drop], - extra_tool_calls=extra_tool_calls, - ), - ] - - -def get_custom_interfaces_tasks(extra_tool_calls: int = 0) -> List[Task]: - return [ - PublishROS2HRIMessageTextTask( - topic="/to_human", - text="Hello!", - validators=[pub_HRIMessage_text_ord_val], - extra_tool_calls=extra_tool_calls, - ), - PublishROS2HRIMessageTextTask( - topic="/to_human", - text="Hello!", - validators=[list_topic_get_interface_publish_ord_val], - extra_tool_calls=extra_tool_calls, - ), - ] - - -def get_spatial_tasks(extra_tool_calls: int = 0) -> Sequence[Task]: - true_tasks = [ - BoolImageTask( - task_input=input_item, - validators=[ret_true_ord_val], - extra_tool_calls=extra_tool_calls, - ) - for input_item in true_response_inputs - ] - false_tasks = [ - BoolImageTask( - task_input=input_item, - validators=[ret_false_ord_val], - extra_tool_calls=extra_tool_calls, - ) - for input_item in false_response_inputs - ] - return true_tasks + false_tasks - def get_tasks( - extra_tool_calls: int = 0, + extra_tool_calls: List[int] = [0], complexities: List[Literal["easy", "medium", "hard"]] = ["easy", "medium", "hard"], + prompt_detail: List[Literal["brief", "descriptive"]] = ["brief", "descriptive"], + n_shots: List[Literal[0, 2, 5]] = [0, 2, 5], task_types: List[ Literal[ "basic", @@ -430,18 +48,58 @@ def get_tasks( "spatial_reasoning", ], ) -> List[Task]: - # TODO (jmatejcz) implement complexity sorting - tasks: List[Task] = [] + """Get a list of tasks based on the provided configuration. + + Parameters + ---------- + Parameters match :class:`~src.rai_bench.rai_bench.test_models.ToolCallingAgentBenchmarkConfig`. + See the class documentation for parameter descriptions. + + Returns + ------- + List[Task] + sequence of spatial reasoning tasks with varying difficulty levels. + There will be every combination of extra_tool_calls x prompt_detail x n_shots tasks generated. + """ + + all_tasks: List[Task] = [] if "basic" in task_types: - tasks += get_basic_tasks(extra_tool_calls=extra_tool_calls) + all_tasks += get_basic_tasks( + extra_tool_calls=extra_tool_calls, + prompt_detail=prompt_detail, + n_shots=n_shots, + ) if "custom_interfaces" in task_types: - tasks += get_custom_interfaces_tasks(extra_tool_calls=extra_tool_calls) + all_tasks += get_custom_interfaces_tasks( + extra_tool_calls=extra_tool_calls, + prompt_detail=prompt_detail, + n_shots=n_shots, + ) if "manipulation" in task_types: - tasks += get_manipulation_tasks(extra_tool_calls=extra_tool_calls) + all_tasks += get_manipulation_tasks( + extra_tool_calls=extra_tool_calls, + prompt_detail=prompt_detail, + n_shots=n_shots, + ) if "navigation" in task_types: - tasks += get_navigation_tasks(extra_tool_calls=extra_tool_calls) + all_tasks += get_navigation_tasks( + extra_tool_calls=extra_tool_calls, + prompt_detail=prompt_detail, + n_shots=n_shots, + ) if "spatial_reasoning" in task_types: - tasks += get_spatial_tasks(extra_tool_calls=extra_tool_calls) + all_tasks += get_spatial_tasks( + extra_tool_calls=extra_tool_calls, + prompt_detail=prompt_detail, + n_shots=n_shots, + ) + + filtered_tasks: List[Task] = [] + for task in all_tasks: + if task.complexity not in complexities: + continue + + filtered_tasks.append(task) - random.shuffle(tasks) - return tasks + random.shuffle(all_tasks) + return all_tasks diff --git a/src/rai_bench/rai_bench/tool_calling_agent/results_tracking.py b/src/rai_bench/rai_bench/tool_calling_agent/results_tracking.py index 7d6a1a1d9..ba0034fa1 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/results_tracking.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/results_tracking.py @@ -41,8 +41,12 @@ class ValidatorResult(BaseModel): class TaskResult(BaseModel): - task_prompt: str = Field(..., description="The task prompt.") + task_prompt: str = Field(..., description="The task base prompt.") system_prompt: str = Field(..., description="The system prompt.") + examples_in_system_prompt: int = Field( + ..., description="Number of examples on how to use tool in system prompt" + ) + prompt_detail: str = Field(..., description="How detailed the task prompt is.") complexity: str = Field(..., description="Complexity of the task.") type: str = Field(..., description="Type of task, for example: manipulation") model_name: str = Field(..., description="Name of the LLM.") diff --git a/src/rai_bench/rai_bench/tool_calling_agent/tasks/basic.py b/src/rai_bench/rai_bench/tool_calling_agent/tasks/basic.py index 511cf2c93..9eb1a159c 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/tasks/basic.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/tasks/basic.py @@ -14,328 +14,206 @@ import logging from abc import ABC -from typing import Dict, List +from typing import List -import inflect from langchain_core.tools import BaseTool -from rai.types import Point from rai_bench.tool_calling_agent.interfaces import ( Task, - Validator, ) +from rai_bench.tool_calling_agent.mocked_ros2_interfaces import COMMON_TOPICS_AND_TYPES from rai_bench.tool_calling_agent.mocked_tools import ( - MockGetObjectPositionsTool, MockGetROS2ImageTool, MockGetROS2TopicsNamesAndTypesTool, MockReceiveROS2MessageTool, ) loggers_type = logging.Logger +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT = """You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system. +Be proactive and use the tools to answer questions.""" - -PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT = """You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system. -Be proactive and use the tools to answer questions. +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT = ( + PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT + + """ Example of tool calls: - get_ros2_message_interface, args: {'msg_type': 'geometry_msgs/msg/Twist'} -- publish_ros2_message, args: {'topic': '/cmd_vel', 'message_type': 'geometry_msgs/msg/Twist', 'message': {linear: {x: 0.5, y: 0.0, z: 0.0}, angular: {x: 0.0, y: 0.0, z: 1.0}}} -- get_ros2_message_interface, args: {'msg_type': 'turtlesim/srv/TeleportAbsolute'} +- publish_ros2_message, args: {'topic': '/cmd_vel', 'message_type': 'geometry_msgs/msg/Twist', 'message': {linear: {x: 0.5, y: 0.0, z: 0.0}, angular: {x: 0.0, y: 0.0, z: 1.0}}}""" +) + +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_5_SHOT = ( + PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT + + """ +- get_ros2_topics_names_and_types, args: {} +- get_ros2_image, args: {'topic': '/camera/image_raw', 'timeout_sec': 10} - publish_ros2_message, args: {'topic': '/turtle1/teleport_absolute', 'message_type': 'turtlesim/srv/TeleportAbsolute', 'message': {x: 5.0, y: 2.0, theta: 1.57}}""" +) -CAMERA_TOPICS_AND_TYPES = [ - "topic: /color_camera_info5\ntype: sensor_msgs/msg/CameraInfo\n", - "topic: /color_image5\ntype: sensor_msgs/msg/Image\n", - "topic: /depth_camera_info5\ntype: sensor_msgs/msg/CameraInfo\n", - "topic: /depth_image5\ntype: sensor_msgs/msg/Image\n", -] -CAMERA_TOPICS = [ - "/color_camera_info5", - "/color_image5", - "/depth_camera_info5", - "/depth_image5", +TOPIC_STRINGS = [ + f"topic: {topic}\ntype: {topic_type}\n" + for topic, topic_type in COMMON_TOPICS_AND_TYPES.items() ] class BasicTask(Task, ABC): - @property - def type(self) -> str: - return "basic" - - def get_system_prompt(self) -> str: - return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT - - -class GetROS2TopicsTask(BasicTask): - complexity = "easy" + type = "basic" @property def available_tools(self) -> List[BaseTool]: return [ MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - "topic: /display_contacts\ntype: visualization_msgs/msg/MarkerArray\n", - "topic: /display_planned_path\ntype: moveit_msgs/msg/DisplayTrajectory\n", - "topic: /execute_trajectory/_action/feedback\ntype: moveit_msgs/action/ExecuteTrajectory_FeedbackMessage\n", - "topic: /execute_trajectory/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /joint_states\ntype: sensor_msgs/msg/JointState\n", - "topic: /monitored_planning_scene\ntype: moveit_msgs/msg/PlanningScene\n", - "topic: /motion_plan_request\ntype: moveit_msgs/msg/MotionPlanRequest\n", - "topic: /move_action/_action/feedback\ntype: moveit_msgs/action/MoveGroup_FeedbackMessage\n", - "topic: /move_action/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /panda_arm_controller/follow_joint_trajectory/_action/feedback\ntype: control_msgs/action/FollowJointTrajectory_FeedbackMessage\n", - "topic: /panda_arm_controller/follow_joint_trajectory/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /panda_hand_controller/gripper_cmd/_action/feedback\ntype: control_msgs/action/GripperCommand_FeedbackMessage\n", - "topic: /panda_hand_controller/gripper_cmd/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /parameter_events\ntype: rcl_interfaces/msg/ParameterEvent\n", - "topic: /planning_scene\ntype: moveit_msgs/msg/PlanningScene\n", - "topic: /planning_scene_world\ntype: moveit_msgs/msg/PlanningSceneWorld\n", - "topic: /pointcloud\ntype: sensor_msgs/msg/PointCloud2\n", - "topic: /robot_description\ntype: std_msgs/msg/String\n", - "topic: /robot_description_semantic\ntype: std_msgs/msg/String\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /tf\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /tf_static\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /trajectory_execution_event\ntype: std_msgs/msg/String\n", - ] - + CAMERA_TOPICS_AND_TYPES - ) + mock_topics_names_and_types=TOPIC_STRINGS + ), + MockGetROS2ImageTool(available_topics=list(COMMON_TOPICS_AND_TYPES.keys())), + MockReceiveROS2MessageTool( + available_topics=list(COMMON_TOPICS_AND_TYPES.keys()) + ), ] - def get_prompt(self) -> str: - return "Get the names and types of all ROS2 topics" - - -class GetROS2TopicsTask2(BasicTask): - complexity = "easy" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - ] - + CAMERA_TOPICS_AND_TYPES - ) - ] + def optional_tool_calls_number(self) -> int: + # Listing topics before getting any message + return 1 - def get_prompt(self) -> str: - return "What is in the ROS2 network?" + def get_system_prompt(self) -> str: + if self.n_shots == 0: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT + elif self.n_shots == 2: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT + else: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_5_SHOT -class GetROS2RGBCameraTask(BasicTask): +class GetROS2TopicsTask(BasicTask): complexity = "easy" @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - ] - + CAMERA_TOPICS_AND_TYPES - ), - MockGetROS2ImageTool(available_topics=CAMERA_TOPICS), - ] + def optional_tool_calls_number(self) -> int: + return 0 + + def get_base_prompt(self) -> str: + return "Get all topics" def get_prompt(self) -> str: - return "Get the RGB image from the camera." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} available in the ROS2 system with their names and message types. " + "You can discover what topics are currently active." + ) -class GetROS2DepthCameraTask(BasicTask): +class GetROS2RGBCameraTask(BasicTask): complexity = "easy" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /camera_image_color\ntype: sensor_msgs/msg/Image\n", - "topic: /camera_image_depth\ntype: sensor_msgs/msg/Image\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - ] - + CAMERA_TOPICS_AND_TYPES - ), - MockGetROS2ImageTool(available_topics=CAMERA_TOPICS), - ] + def get_base_prompt(self) -> str: + return "Get RGB camera image." def get_prompt(self) -> str: - return "Get the depth image from the camera." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can explore available camera topics and capture the RGB color image." + ) -class GetAllROS2RGBCamerasTask(BasicTask): +class GetROS2DepthCameraTask(BasicTask): complexity = "easy" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - ] - + CAMERA_TOPICS_AND_TYPES - ), - MockGetROS2ImageTool(available_topics=CAMERA_TOPICS), - ] + def get_base_prompt(self) -> str: + return "Get depth camera image." def get_prompt(self) -> str: - return "Get RGB images from all of the available cameras." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can explore available camera topics and capture the depth image data." + ) -class GetAllROS2DepthCamerasTask(BasicTask): +class GetPointcloudTask(BasicTask): complexity = "easy" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - ] - + CAMERA_TOPICS_AND_TYPES - ), - MockGetROS2ImageTool(available_topics=CAMERA_TOPICS), - ] + def get_base_prompt(self) -> str: + return "Get the pointcloud data." def get_prompt(self) -> str: - return "Get depth images from all of the available cameras." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can discover available sensor topics and receive the pointcloud information." + ) -# NOTE (jm) is this task redundant? -class GetROS2MessageTask(BasicTask): +class GetRobotDescriptionTask(BasicTask): complexity = "easy" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - ] - + CAMERA_TOPICS_AND_TYPES - ), - MockReceiveROS2MessageTool(available_topics=CAMERA_TOPICS), - ] - - def get_system_prompt(self) -> str: - return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT + def get_base_prompt(self) -> str: + return "Get robot description." def get_prompt(self) -> str: - return "Get RGB image." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} You can explore the system " + "to find robot description data." + ) -class GetRobotDescriptionTask(BasicTask): - complexity = "easy" +class GetAllROS2CamerasTask(BasicTask): + complexity = "medium" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /pointcloud\ntype: sensor_msgs/msg/PointCloud2\n", - "topic: /robot_description\ntype: std_msgs/msg/String\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /tf\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /tf_static\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /trajectory_execution_event\ntype: std_msgs/msg/String\n", - ] - ), - MockReceiveROS2MessageTool(available_topics=["/robot_description"]), - ] + def get_base_prompt(self) -> str: + return "Get all camera images" def get_prompt(self) -> str: - return "Give me description of the robot." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} from all available camera sources in the system. " + "This includes both RGB color images and depth images. " + "You can discover what camera topics are available and capture images from each." + ) -class GetPointcloudTask(BasicTask): - complexity = "easy" +class CheckRobotHealthTask(BasicTask): + complexity = "medium" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /pointcloud\ntype: sensor_msgs/msg/PointCloud2\n", - "topic: /robot_description\ntype: std_msgs/msg/String\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /tf\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /tf_static\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /trajectory_execution_event\ntype: std_msgs/msg/String\n", - ] - ), - MockReceiveROS2MessageTool(available_topics=["/pointcloud"]), - ] + def get_base_prompt(self) -> str: + return "Check robot health status" def get_prompt(self) -> str: - return "Get the pointcloud." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} by examining system diagnostics and monitoring data. " + "You can explore available diagnostic topics and gather information " + "about robot health, joint states, and system logs." + ) -class GetObjectPositionsTask(Task): - complexity = "easy" +class AssessSensorDataQualityTask(BasicTask): + complexity = "hard" - def __init__( - self, - objects: Dict[str, List[Point]], - validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, - ) -> None: - super().__init__( - validators=validators, extra_tool_calls=extra_tool_calls, logger=logger - ) - """Task to get the positions of the objects - - Examples - -------- - objects = { - "banana": [(0.1, 0.2, 0.3), (0.4, 0.5, 0.6)], - "cube": [(0.7, 0.8, 0.9)], - } - """ - self.objects = objects - - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /pointcloud\ntype: sensor_msgs/msg/PointCloud2\n", - "topic: /robot_description\ntype: std_msgs/msg/String\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /tf\ntype: tf2_msgs/msg/TFMessage\n", - ] - ), - MockGetObjectPositionsTool(mock_objects=self.objects), - ] + def get_base_prompt(self) -> str: + return "Assess sensor data quality" def get_prompt(self) -> str: - """Generates a prompt based on the objects provided in the task. If there is more than one object, the object in the prompt will be pluralized. - Returns: - str: Formatted prompt for the task - """ - inflector = inflect.engine() - object_counts = {obj: len(positions) for obj, positions in self.objects.items()} - formatted_objects = [ - inflector.plural(obj) if count > 1 else obj - for obj, count in object_counts.items() - ] - if len(formatted_objects) > 1: - objects_list = ( - ", ".join(formatted_objects[:-1]) + f", and {formatted_objects[-1]}" - ) + if self.prompt_detail == "brief": + return self.get_base_prompt() else: - objects_list = formatted_objects[0] - return f"Get the {objects_list} positions." + return ( + f"{self.get_base_prompt()} across all available sensors in the robot system. " + "You can explore sensor topics and gather data from various sources " + "including laser scans, cameras, pointclouds, and odometry to evaluate " + "overall sensor performance." + ) diff --git a/src/rai_bench/rai_bench/tool_calling_agent/tasks/custom_interfaces.py b/src/rai_bench/rai_bench/tool_calling_agent/tasks/custom_interfaces.py index 2d9e36360..0cff01ff7 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/tasks/custom_interfaces.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/tasks/custom_interfaces.py @@ -14,16 +14,13 @@ import logging from abc import ABC -from typing import Any, Dict, List, Type +from typing import Any, List from langchain_core.tools import BaseTool -from pydantic import BaseModel from rai.types import ( BoundingBox2D, - CameraInfo, Detection2D, Header, - Image, Point, Pose, Pose2D, @@ -32,20 +29,21 @@ Time, ) from rai.types.rai_interfaces import ( - ManipulatorMoveToRequest, RAIDetectionArray, - RAIGroundedSamRequest, - RAIGroundingDinoRequest, ) -from rai_bench.tool_calling_agent.interfaces import Task, Validator -from rai_bench.tool_calling_agent.messages.base import Clock -from rai_bench.tool_calling_agent.messages.services import ( - StringListRequest, - VectorStoreRetrievalRequest, - WhatISeeRequest, +from rai_bench.tool_calling_agent.interfaces import Task, TaskArgs, Validator +from rai_bench.tool_calling_agent.mocked_ros2_interfaces import ( + COMMON_INTERFACES, + COMMON_SERVICES_AND_TYPES, + COMMON_TOPIC_MODELS, + COMMON_TOPICS_AND_TYPES, + CUSTOM_INTERFACES, + CUSTOM_SERVICE_MODELS, + CUSTOM_SERVICES_AND_TYPES, + CUSTOM_TOPIC_MODELS, + CUSTOM_TOPICS_AND_TYPES, ) -from rai_bench.tool_calling_agent.messages.topics import AudioMessage, HRIMessage from rai_bench.tool_calling_agent.mocked_tools import ( MockCallROS2ServiceTool, MockGetROS2MessageInterfaceTool, @@ -55,764 +53,17 @@ ) loggers_type = logging.Logger +INTERFACES = COMMON_INTERFACES | CUSTOM_INTERFACES -# dict of interfaces where keys are interfaces types and values are output -# of GetROS2MessageInterfaceTool which are same as ros2 interface show outputs -# the dict contains custom as well as couple other common interfaces -MOCK_INTERFACES: Dict[str, str] = { - "sensor_msgs/msg/CameraInfo": """ -# This message defines meta information for a camera. It should be in a -# camera namespace on topic "camera_info" and accompanied by up to five -# image topics named: -# -# image_raw - raw data from the camera driver, possibly Bayer encoded -# image - monochrome, distorted -# image_color - color, distorted -# image_rect - monochrome, rectified -# image_rect_color - color, rectified -# -# The image_pipeline contains packages (image_proc, stereo_image_proc) -# for producing the four processed image topics from image_raw and -# camera_info. The meaning of the camera parameters are described in -# detail at http://www.ros.org/wiki/image_pipeline/CameraInfo. -# -# The image_geometry package provides a user-friendly interface to -# common operations using this meta information. If you want to, e.g., -# project a 3d point into image coordinates, we strongly recommend -# using image_geometry. -# -# If the camera is uncalibrated, the matrices D, K, R, P should be left -# zeroed out. In particular, clients may assume that K[0] == 0.0 -# indicates an uncalibrated camera. - -####################################################################### -# Image acquisition info # -####################################################################### - -# Time of image acquisition, camera coordinate frame ID -std_msgs/Header header # Header timestamp should be acquisition time of image - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of camera - # +x should point to the right in the image - # +y should point down in the image - # +z should point into the plane of the image - - -####################################################################### -# Calibration Parameters # -####################################################################### -# These are fixed during camera calibration. Their values will be the # -# same in all messages until the camera is recalibrated. Note that # -# self-calibrating systems may "recalibrate" frequently. # -# # -# The internal parameters can be used to warp a raw (distorted) image # -# to: # -# 1. An undistorted image (requires D and K) # -# 2. A rectified image (requires D, K, R) # -# The projection matrix P projects 3D points into the rectified image.# -####################################################################### - -# The image dimensions with which the camera was calibrated. -# Normally this will be the full camera resolution in pixels. -uint32 height -uint32 width - -# The distortion model used. Supported models are listed in -# sensor_msgs/distortion_models.hpp. For most cameras, "plumb_bob" - a -# simple model of radial and tangential distortion - is sufficent. -string distortion_model - -# The distortion parameters, size depending on the distortion model. -# For "plumb_bob", the 5 parameters are: (k1, k2, t1, t2, k3). -float64[] d - -# Intrinsic camera matrix for the raw (distorted) images. -# [fx 0 cx] -# K = [ 0 fy cy] -# [ 0 0 1] -# Projects 3D points in the camera coordinate frame to 2D pixel -# coordinates using the focal lengths (fx, fy) and principal point -# (cx, cy). -float64[9] k # 3x3 row-major matrix - -# Rectification matrix (stereo cameras only) -# A rotation matrix aligning the camera coordinate system to the ideal -# stereo image plane so that epipolar lines in both stereo images are -# parallel. -float64[9] r # 3x3 row-major matrix - -# Projection/camera matrix -# [fx' 0 cx' Tx] -# P = [ 0 fy' cy' Ty] -# [ 0 0 1 0] -# By convention, this matrix specifies the intrinsic (camera) matrix -# of the processed (rectified) image. That is, the left 3x3 portion -# is the normal camera intrinsic matrix for the rectified image. -# It projects 3D points in the camera coordinate frame to 2D pixel -# coordinates using the focal lengths (fx', fy') and principal point -# (cx', cy') - these may differ from the values in K. -# For monocular cameras, Tx = Ty = 0. Normally, monocular cameras will -# also have R = the identity and P[1:3,1:3] = K. -# For a stereo pair, the fourth column [Tx Ty 0]' is related to the -# position of the optical center of the second camera in the first -# camera's frame. We assume Tz = 0 so both cameras are in the same -# stereo image plane. The first camera always has Tx = Ty = 0. For -# the right (second) camera of a horizontal stereo pair, Ty = 0 and -# Tx = -fx' * B, where B is the baseline between the cameras. -# Given a 3D point [X Y Z]', the projection (x, y) of the point onto -# the rectified image is given by: -# [u v w]' = P * [X Y Z 1]' -# x = u / w -# y = v / w -# This holds for both images of a stereo pair. -float64[12] p # 3x4 row-major matrix - - -####################################################################### -# Operational Parameters # -####################################################################### -# These define the image region actually captured by the camera # -# driver. Although they affect the geometry of the output image, they # -# may be changed freely without recalibrating the camera. # -####################################################################### - -# Binning refers here to any camera setting which combines rectangular -# neighborhoods of pixels into larger "super-pixels." It reduces the -# resolution of the output image to -# (width / binning_x) x (height / binning_y). -# The default values binning_x = binning_y = 0 is considered the same -# as binning_x = binning_y = 1 (no subsampling). -uint32 binning_x -uint32 binning_y - -# Region of interest (subwindow of full camera resolution), given in -# full resolution (unbinned) image coordinates. A particular ROI -# always denotes the same window of pixels on the camera sensor, -# regardless of binning settings. -# The default setting of roi (all values 0) is considered the same as -# full resolution (roi.width = width, roi.height = height). -RegionOfInterest roi - # - uint32 x_offset # - # (0 if the ROI includes the left edge of the image) - uint32 y_offset # - # (0 if the ROI includes the top edge of the image) - uint32 height # - uint32 width # - bool do_rectify -""", - "sensor_msgs/msg/Image": """ -# This message contains an uncompressed image -# (0, 0) is at top-left corner of image - -std_msgs/Header header # Header timestamp should be acquisition time of image - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of cameara - # +x should point to the right in the image - # +y should point down in the image - # +z should point into to plane of the image - # If the frame_id here and the frame_id of the CameraInfo - # message associated with the image conflict - # the behavior is undefined - -uint32 height # image height, that is, number of rows -uint32 width # image width, that is, number of columns - -# The legal values for encoding are in file src/image_encodings.cpp -# If you want to standardize a new string format, join -# ros-users@lists.ros.org and send an email proposing a new encoding. - -string encoding # Encoding of pixels -- channel meaning, ordering, size - # taken from the list of strings in include/sensor_msgs/image_encodings.hpp - -uint8 is_bigendian # is this data bigendian? -uint32 step # Full row length in bytes -uint8[] data # actual matrix data, size is (step * rows) -""", - "rosgraph_msgs/msg/Clock": """ -# This message communicates the current time. -# -# For more information, see https://design.ros2.org/articles/clock_and_time.html. -builtin_interfaces/Time clock - int32 sec - uint32 nanosec -""", - "rai_interfaces/msg/HRIMessage": """ -# -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id -string text -sensor_msgs/Image[] images - std_msgs/Header header # - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of cameara - # +x should point to the right in the image - # +y should point down in the image - # +z should point into to plane of the image - # If the frame_id here and the frame_id of the CameraInfo - # message associated with the image conflict - # the behavior is undefined - uint32 height # - uint32 width # - string encoding # - # taken from the list of strings in include/sensor_msgs/image_encodings.hpp - uint8 is_bigendian # - uint32 step # - uint8[] data # -rai_interfaces/AudioMessage[] audios - # - # - # - # - # - int16[] audio - uint16 sample_rate - uint16 channels -string communication_id -int64 seq_no -bool seq_end -""", - "rai_interfaces/msg/AudioMessage": """ -# -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -int16[] audio -uint16 sample_rate -uint16 channels -""", - "rai_interfaces/msg/RAIDetectionArray": """ -# -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# A list of 2D detections, for a multi-object 2D detector. -std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - -# A list of the detected proposals. A multi-proposal detector might generate -# this list with many candidate detections generated from a single input. -vision_msgs/Detection2D[] detections - # - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - ObjectHypothesisWithPose[] results - ObjectHypothesis hypothesis - string class_id - float64 score - geometry_msgs/PoseWithCovariance pose - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 - float64[36] covariance - BoundingBox2D bbox - vision_msgs/Pose2D center - float64 x - float64 y - float64 theta - float64 size_x - float64 size_y - string id -# a list of classes being detected -string[] detection_classes -""", - "rai_interfaces/srv/ManipulatorMoveTo": """ -# -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A simplified approach with binary states for the gripper -bool initial_gripper_state -bool final_gripper_state -geometry_msgs/PoseStamped target_pose - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 ---- -bool success -""", - "rai_interfaces/srv/RAIGroundedSam": """ -# -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -RAIDetectionArray detections - # - # - # - # - # - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - vision_msgs/Detection2D[] detections - # - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - ObjectHypothesisWithPose[] results - ObjectHypothesis hypothesis - string class_id - float64 score - geometry_msgs/PoseWithCovariance pose - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 - float64[36] covariance - BoundingBox2D bbox - vision_msgs/Pose2D center - float64 x - float64 y - float64 theta - float64 size_x - float64 size_y - string id - string[] detection_classes -sensor_msgs/Image source_img - std_msgs/Header header # - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of cameara - # +x should point to the right in the image - # +y should point down in the image - # +z should point into to plane of the image - # If the frame_id here and the frame_id of the CameraInfo - # message associated with the image conflict - # the behavior is undefined - uint32 height # - uint32 width # - string encoding # - # taken from the list of strings in include/sensor_msgs/image_encodings.hpp - uint8 is_bigendian # - uint32 step # - uint8[] data # ---- -sensor_msgs/Image[] masks - std_msgs/Header header # - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of cameara - # +x should point to the right in the image - # +y should point down in the image - # +z should point into to plane of the image - # If the frame_id here and the frame_id of the CameraInfo - # message associated with the image conflict - # the behavior is undefined - uint32 height # - uint32 width # - string encoding # - # taken from the list of strings in include/sensor_msgs/image_encodings.hpp - uint8 is_bigendian # - uint32 step # - uint8[] data # -""", - "rai_interfaces/srv/RAIGroundingDino": """ -# -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -string classes -float64 box_threshold -float64 text_threshold -sensor_msgs/Image source_img - std_msgs/Header header # - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of cameara - # +x should point to the right in the image - # +y should point down in the image - # +z should point into to plane of the image - # If the frame_id here and the frame_id of the CameraInfo - # message associated with the image conflict - # the behavior is undefined - uint32 height # - uint32 width # - string encoding # - # taken from the list of strings in include/sensor_msgs/image_encodings.hpp - uint8 is_bigendian # - uint32 step # - uint8[] data # ---- -RAIDetectionArray detections - # - # - # - # - # - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - vision_msgs/Detection2D[] detections - # - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - ObjectHypothesisWithPose[] results - ObjectHypothesis hypothesis - string class_id - float64 score - geometry_msgs/PoseWithCovariance pose - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 - float64[36] covariance - BoundingBox2D bbox - vision_msgs/Pose2D center - float64 x - float64 y - float64 theta - float64 size_x - float64 size_y - string id - string[] detection_classes -""", - "rai_interfaces/srv/StringList": """ -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Request - empty ---- -# Response -bool success -string[] string_list -""", - "rai_interfaces/srv/VectorStoreRetrieval": """ -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# +TOPICS_AND_TYPES = COMMON_TOPICS_AND_TYPES | CUSTOM_TOPICS_AND_TYPES +TOPIC_MODELS = COMMON_TOPIC_MODELS | CUSTOM_TOPIC_MODELS -# Request -string query - ---- -# Response -bool success -string message -string[] documents -float32[] scores -""", - "rai_interfaces/srv/WhatISee": """z -# Copyright (C) 2024 Robotec.AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Request (empty) - ---- -# Response, timed with image timestamp -string[] observations -string perception_source -sensor_msgs/Image image - std_msgs/Header header # - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - # Header frame_id should be optical frame of camera - # origin of frame should be optical center of cameara - # +x should point to the right in the image - # +y should point down in the image - # +z should point into to plane of the image - # If the frame_id here and the frame_id of the CameraInfo - # message associated with the image conflict - # the behavior is undefined - uint32 height # - uint32 width # - string encoding # - # taken from the list of strings in include/sensor_msgs/image_encodings.hpp - uint8 is_bigendian # - uint32 step # - uint8[] data # -geometry_msgs/Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -""", - "rai_interfaces/action/Task": """ -# Goal -string task -string description -string priority - ---- -# Result -bool success -string report - ---- -# Feedback -string current_status -""", - "/load_map": """ -string filename ---- -bool success -""", - "/query_planner_interface": """ ---- - -# The planning instances that could be used in the benchmark -PlannerInterfaceDescription[] planner_interfaces - string name - string pipeline_id - string[] planner_ids - -""", -} - - -SERVICES_AND_TYPES = { - # sample interfaces - # "/load_map": "moveit_msgs/srv/LoadMap", - # "/query_planner_interface": "moveit_msgs/srv/QueryPlannerInterfaces", - # custom interfaces - "/manipulator_move_to": "rai_interfaces/srv/ManipulatorMoveTo", - "/grounded_sam_segment": "rai_interfaces/srv/RAIGroundedSam", - "/grounding_dino_classify": "rai_interfaces/srv/RAIGroundingDino", - "/get_log_digest": "rai_interfaces/srv/StringList", - "/rai_whoami_documentation_service": "rai_interfaces/srv/VectorStoreRetrieval", - "/rai/whatisee/get": "rai_interfaces/srv/WhatISee", -} - -SERVICE_MODELS: Dict[str, Type[BaseModel]] = { - "rai_interfaces/srv/ManipulatorMoveTo": ManipulatorMoveToRequest, - "rai_interfaces/srv/RAIGroundedSam": RAIGroundedSamRequest, - "rai_interfaces/srv/RAIGroundingDino": RAIGroundingDinoRequest, - "rai_interfaces/srv/StringList": StringListRequest, - "rai_interfaces/srv/VectorStoreRetrieval": VectorStoreRetrievalRequest, - "rai_interfaces/srv/WhatISee": WhatISeeRequest, -} - -TOPICS_AND_TYPES: Dict[str, str] = { - # sample topics - "/camera_image_color": "sensor_msgs/msg/Image", - "/camera_image_depth": "sensor_msgs/msg/Image", - "/clock": "rosgraph_msgs/msg/Clock", - "/color_camera_info": "sensor_msgs/msg/CameraInfo", - "/color_camera_info5": "sensor_msgs/msg/CameraInfo", - "/depth_camera_info5": "sensor_msgs/msg/CameraInfo", - "/depth_image5": "sensor_msgs/msg/Image", - # custom topics - "/to_human": "rai_interfaces/msg/HRIMessage", - "/send_audio": "rai_interfaces/msg/AudioMessage", - "/send_detections": "rai_interfaces/msg/RAIDetectionArray", -} - -ACTIONS_AND_TYPES = { - # custom actions - "/perform_task": "rai_interfaces/action/Task", - # some sample actions - # "/execute_trajectory": "moveit_msgs/action/ExecuteTrajectory", - # "/move_action": "moveit_msgs/action/MoveGroup", - # "/follow_joint_trajectory": "control_msgs/action/FollowJointTrajectory", - # "/gripper_cmd": "control_msgs/action/GripperCommand", -} +SERVICES_AND_TYPES = COMMON_SERVICES_AND_TYPES | CUSTOM_SERVICES_AND_TYPES TOPIC_STRINGS = [ f"topic: {topic}\ntype: {msg_type}\n" for topic, msg_type in TOPICS_AND_TYPES.items() ] -TOPIC_MODELS: Dict[str, Type[BaseModel]] = { - "sensor_msgs/msg/CameraInfo": CameraInfo, - "sensor_msgs/msg/Image": Image, - "rosgraph_msgs/msg/Clock": Clock, - "rai_interfaces/msg/HRIMessage": HRIMessage, - "rai_interfaces/msg/AudioMessage": AudioMessage, - "rai_interfaces/msg/RAIDetectionArray": RAIDetectionArray, -} - -IMAGE_TOPICS: Dict[str, str] = { - "/attached_collision_object": "moveit_msgs/msg/AttachedCollisionObject", - "/camera_image_color": "sensor_msgs/msg/Image", - "/camera_image_depth": "sensor_msgs/msg/Image", - "/clock": "rosgraph_msgs/msg/Clock", - "/collision_object": "moveit_msgs/msg/CollisionObject", - "/color_camera_info": "sensor_msgs/msg/CameraInfo", - "/color_camera_info5": "sensor_msgs/msg/CameraInfo", - "/depth_camera_info5": "sensor_msgs/msg/CameraInfo", -} + SERVICE_STRINGS = [ f"service: {service}\ntype: {msg_type}\n" @@ -820,34 +71,49 @@ ] -PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT = """You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system. -Be proactive and use the tools to answer questions. +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT = """You are a ROS 2 expert that want to solve tasks. You have access to various tools that allow you to query the ROS 2 system. +Be proactive and use the tools to answer questions.""" + +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT = ( + PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT + + """ Example of tool calls: - get_ros2_message_interface, args: {'msg_type': 'geometry_msgs/msg/Twist'} -- publish_ros2_message, args: {'topic': '/cmd_vel', 'message_type': 'geometry_msgs/msg/Twist', 'message': {linear: {x: 0.5, y: 0.0, z: 0.0}, angular: {x: 0.0, y: 0.0, z: 1.0}}} -- get_ros2_message_interface, args: {'msg_type': 'turtlesim/srv/TeleportAbsolute'} -- publish_ros2_message, args: {'topic': '/turtle1/teleport_absolute', 'message_type': 'turtlesim/srv/TeleportAbsolute', 'message': {x: 5.0, y: 2.0, theta: 1.57}}""" +- publish_ros2_message, args: {'topic': '/cmd_vel', 'message_type': 'geometry_msgs/msg/Twist', 'message': {linear: {x: 0.5, y: 0.0, z: 0.0}, angular: {x: 0.0, y: 0.0, z: 1.0}}}""" +) + +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_5_SHOT = ( + PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT + + """ +- get_ros2_topics_names_and_types, args: {} +- get_ros2_message_interface, args: {'msg_type': 'rai_interfaces/msg/HRIMessage'} +- call_ros2_service, args: {'service': '/grounding_dino_classify', 'service_type': 'rai_interfaces/srv/RAIGroundingDino', 'request': {'classes': 'bottle, book', 'box_threshold': 0.4, 'text_threshold': 0.25}}""" +) class CustomInterfaceTask(Task, ABC): + type = "custom_interface" + @property - def type(self) -> str: - return "custom_interface" + def optional_tool_calls_number(self) -> int: + # list topics + # get interface is not optional + return 1 + + def get_system_prompt(self) -> str: + if self.n_shots == 0: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT + elif self.n_shots == 2: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT + else: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_5_SHOT class CustomInterfacesTopicTask(CustomInterfaceTask, ABC): def __init__( - self, - topic: str, - validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, + self, topic: str, validators: List[Validator], task_args: TaskArgs ) -> None: - super().__init__( - validators=validators, - extra_tool_calls=extra_tool_calls, - logger=logger, - ) + super().__init__(validators=validators, task_args=task_args) self.topic = topic @property @@ -856,7 +122,7 @@ def available_tools(self) -> List[BaseTool]: MockGetROS2TopicsNamesAndTypesTool( mock_topics_names_and_types=TOPIC_STRINGS ), - MockGetROS2MessageInterfaceTool(mock_interfaces=MOCK_INTERFACES), + MockGetROS2MessageInterfaceTool(mock_interfaces=INTERFACES), MockPublishROS2MessageTool( available_topics=list(TOPICS_AND_TYPES.keys()), available_message_types=list(TOPICS_AND_TYPES.values()), @@ -864,9 +130,6 @@ def available_tools(self) -> List[BaseTool]: ), ] - def get_system_prompt(self) -> str: - return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT - class CustomInterfacesServiceTask(CustomInterfaceTask, ABC): def __init__( @@ -874,14 +137,9 @@ def __init__( service: str, service_args: dict[str, Any], validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, + task_args: TaskArgs, ) -> None: - super().__init__( - validators=validators, - extra_tool_calls=extra_tool_calls, - logger=logger, - ) + super().__init__(validators=validators, task_args=task_args) self.service = service self.service_args = service_args @@ -891,211 +149,337 @@ def available_tools(self) -> List[BaseTool]: MockGetROS2ServicesNamesAndTypesTool( mock_service_names_and_types=SERVICE_STRINGS ), - MockGetROS2MessageInterfaceTool(mock_interfaces=MOCK_INTERFACES), + MockGetROS2MessageInterfaceTool(mock_interfaces=INTERFACES), MockCallROS2ServiceTool( available_services=list(SERVICES_AND_TYPES.keys()), available_service_types=list(SERVICES_AND_TYPES.values()), - available_service_models=SERVICE_MODELS, + available_service_models=CUSTOM_SERVICE_MODELS, ), ] -# TODO (jm) add actions Tasks - - -# TODO (jm) should we and how to parametrize these classes? class PublishROS2HRIMessageTextTask(CustomInterfacesTopicTask): complexity = "easy" def __init__( self, topic: str, - text: str, validators: List[Validator], - extra_tool_calls: int = 0, - logger: logging.Logger | None = None, + task_args: TaskArgs, + text: str = "Hello!", ) -> None: - super().__init__(topic, validators, extra_tool_calls, logger) + super().__init__(topic, validators=validators, task_args=task_args) self.text = text + def get_base_prompt(self) -> str: + return f"Publish message to topic '{self.topic}' with text: '{self.text}'." + def get_prompt(self) -> str: - return ( - f"You need to publish a message to the topic '{self.topic}' with the text value: '{self.text}'.\n" - "Before publishing, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 topics and their message types.\n" - f"2. Find the message type for the topic '{self.topic}'.\n" - "3. Retrieve the full message interface definition for that type.\n" - "4. Construct the message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Publish the message to '{self.topic}' using the correct message type and interface.\n" - ) + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can discover available topics, examine the message interface " + f"structure, and publish an HRI message containing the text '{self.text}'." + ) class PublishROS2AudioMessageTask(CustomInterfacesTopicTask): complexity = "easy" - expected_audio: List[int] = [123, 456, 789] - expected_sample_rate: int = 44100 - expected_channels: int = 2 - def get_prompt(self) -> str: + def __init__( + self, + topic: str, + validators: List[Validator], + task_args: TaskArgs, + audio: List[int] = [123, 456, 789], + sample_rate: int = 44100, + channels: int = 2, + ) -> None: + super().__init__(topic, validators=validators, task_args=task_args) + self.expected_audio = audio + self.expected_sample_rate = sample_rate + self.expected_channels = channels + + def get_base_prompt(self) -> str: return ( - f"You need to publish a message to the topic '{self.topic}' with audio samples {self.expected_audio}, " - f"sample rate {self.expected_sample_rate}, and {self.expected_channels} channels.\n" - "Before publishing, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 topics and their message types.\n" - f"2. Find the message type for the topic '{self.topic}'.\n" - "3. Retrieve the full message interface definition for that type.\n" - "4. Construct the message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Publish the message to '{self.topic}' using the correct message type and interface.\n" + f"Publish audio message to topic '{self.topic}' with samples " + f"{self.expected_audio}, sample rate {self.expected_sample_rate}, " + f"channels {self.expected_channels}." ) + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can explore available audio topics, examine the message " + f"interface, and publish audio data with samples={self.expected_audio}, " + f"sample_rate={self.expected_sample_rate}, and channels={self.expected_channels}." + ) + class PublishROS2DetectionArrayTask(CustomInterfacesTopicTask): complexity = "easy" - expected_detection_classes: List[str] = ["person", "car"] - expected_detections: List[Detection2D] = [ - Detection2D( - bbox=BoundingBox2D( - center=Pose2D(x=320.0, y=240.0, theta=0.0), - size_x=50.0, - size_y=50.0, + def __init__( + self, + topic: str, + validators: List[Validator], + task_args: TaskArgs, + detection_classes: List[str] = ["person", "car"], + bbox_center_x: float = 320.0, + bbox_center_y: float = 320.0, + bbox_size_x: float = 50.0, + bbox_size_y: float = 50.0, + ) -> None: + super().__init__(topic, validators=validators, task_args=task_args) + self.expected_detection_classes = detection_classes + self.expected_detections = [ + Detection2D( + bbox=BoundingBox2D( + center=Pose2D(x=bbox_center_x, y=bbox_center_y, theta=0.0), + size_x=bbox_size_x, + size_y=bbox_size_y, + ) ) - ) - ] + ] - def get_prompt(self) -> str: + def get_base_prompt(self) -> str: + bbox_center = self.expected_detections[0].bbox.center + bbox_size = self.expected_detections[0].bbox return ( - f"You need to publish a detection message to the topic '{self.topic}' with one detection:\n" - f"{self.expected_detections[0].model_dump()} and detection classes {self.expected_detection_classes}.\n" - "Before publishing, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 topics and their message types.\n" - f"2. Find the message type for the topic '{self.topic}'.\n" - "3. Retrieve the full message interface definition for that type.\n" - "4. Construct the message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Publish the message to '{self.topic}' using the correct message type and interface.\n" + f"Publish detection array to topic '{self.topic}' with classes " + f"{self.expected_detection_classes} and bbox center " + f"({bbox_center.x}, {bbox_center.y}) size {bbox_size.size_x}x{bbox_size.size_y}." ) + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + bbox_center = self.expected_detections[0].bbox.center + bbox_size = self.expected_detections[0].bbox + return ( + f"{self.get_base_prompt()} " + "You can explore available detection topics, examine the message " + f"interface, and publish detection data with classes={self.expected_detection_classes} " + f"and bounding box at center ({bbox_center.x}, {bbox_center.y}) " + f"with size_x={bbox_size.size_x}, size_y={bbox_size.size_y}." + ) + class CallROS2ManipulatorMoveToServiceTask(CustomInterfacesServiceTask): complexity = "easy" - expected_initial_gripper_state = True - expected_final_gripper_state = False - expected_target_pose: PoseStamped = PoseStamped( - pose=Pose( - position=Point(x=1.0, y=2.0, z=3.0), - orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0), + def __init__( + self, + service: str, + service_args: dict[str, Any], + validators: List[Validator], + task_args: TaskArgs, + target_x: float = 1.0, + target_y: float = 2.0, + target_z: float = 3.0, + initial_gripper_state: bool = True, + final_gripper_state: bool = False, + frame_id: str = "base_link", + ) -> None: + super().__init__( + service, service_args, validators=validators, task_args=task_args + ) + self.expected_initial_gripper_state = initial_gripper_state + self.expected_final_gripper_state = final_gripper_state + self.expected_target_pose = PoseStamped( + header=Header(frame_id=frame_id), + pose=Pose( + position=Point(x=target_x, y=target_y, z=target_z), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0), + ), ) - ) - def get_prompt(self) -> str: + def get_base_prompt(self) -> str: + pos = self.expected_target_pose.pose.position return ( - f"You need to call the service '{self.service}' with a target_pose: " - f"{self.expected_target_pose.model_dump()} and gripper states (initial: {self.expected_initial_gripper_state}, final: {self.expected_final_gripper_state}).\n" - "Before calling, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 services and their types.\n" - f"2. Find the service type for '{self.service}'.\n" - "3. Retrieve the full message interface definition for that service.\n" - "4. Construct the request message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Call the service '{self.service}' using the correct message type and interface.\n" + f"Call service '{self.service}' to move manipulator to pose " + f"({pos.x}, {pos.y}, {pos.z}) with initial_gripper={self.expected_initial_gripper_state}, " + f"final_gripper={self.expected_final_gripper_state}." ) + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + pos = self.expected_target_pose.pose.position + return ( + f"{self.get_base_prompt()} " + "You can discover available manipulation services, examine the service " + f"interface, and call the service with target_pose position (x={pos.x}, " + f"y={pos.y}, z={pos.z}), initial_gripper_state={self.expected_initial_gripper_state}, " + f"and final_gripper_state={self.expected_final_gripper_state}." + ) + class CallGroundedSAMSegmentTask(CustomInterfacesServiceTask): complexity = "easy" - expected_detections: RAIDetectionArray = RAIDetectionArray( - header=Header(stamp=Time(sec=0, nanosec=0), frame_id="camera_frame"), - detections=[], - ) + def __init__( + self, + service: str, + service_args: dict[str, Any], + validators: List[Validator], + task_args: TaskArgs, + frame_id: str = "camera_frame", + ) -> None: + super().__init__( + service, service_args, validators=validators, task_args=task_args + ) + self.expected_detections = RAIDetectionArray( + header=Header(stamp=Time(sec=0, nanosec=0), frame_id=frame_id), + detections=[], + ) + + def get_base_prompt(self) -> str: + return f"Call service '{self.service}' for image segmentation." def get_prompt(self) -> str: - return ( - f"You need to call the service '{self.service}' with detections: {self.expected_detections.model_dump()}\n" - "Before calling, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 services and their types.\n" - f"2. Find the service type for '{self.service}'.\n" - "3. Retrieve the full message interface definition for that service.\n" - "4. Construct the request message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Call the service '{self.service}' using the correct message type and interface.\n" - ) + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + frame_id = self.expected_detections.header.frame_id + return ( + f"{self.get_base_prompt()} " + "You can discover available AI vision services, examine the service " + f"interface, and call the segmentation service with detections array " + f"(empty detections, header frame_id='{frame_id}') and source image." + ) class CallGroundingDinoClassify(CustomInterfacesServiceTask): complexity = "easy" - expected_classes: str = "bottle, book, chair" - expected_box_threshold: float = 0.4 - expected_text_threshold: float = 0.25 + def __init__( + self, + service: str, + service_args: dict[str, Any], + validators: List[Validator], + task_args: TaskArgs, + classes: str = "bottle, book, chair", + box_threshold: float = 0.4, + text_threshold: float = 0.25, + ) -> None: + super().__init__( + service, service_args, validators=validators, task_args=task_args + ) + self.expected_classes = classes + self.expected_box_threshold = box_threshold + self.expected_text_threshold = text_threshold - def get_prompt(self) -> str: + def get_base_prompt(self) -> str: return ( - f"You need to call the service '{self.service}' with classes: '{self.expected_classes}', " - f"box_threshold: {self.expected_box_threshold}, text_threshold: {self.expected_text_threshold}, " - "Before calling, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 services and their types.\n" - f"2. Find the service type for '{self.service}'.\n" - "3. Retrieve the full message interface definition for that service.\n" - "4. Construct the request message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Call the service '{self.service}' using the correct message type and interface.\n" + f"Call service '{self.service}' for object classification with classes " + f"'{self.expected_classes}', box_threshold {self.expected_box_threshold}, " + f"text_threshold {self.expected_text_threshold}." ) + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can discover available AI detection services, examine the service " + f"interface, and call the classification service with classes='{self.expected_classes}', " + f"box_threshold={self.expected_box_threshold}, and text_threshold={self.expected_text_threshold}." + ) + class CallGetLogDigestTask(CustomInterfacesServiceTask): complexity = "easy" - def get_prompt(self) -> str: - return ( - f"You need to call the service '{self.service}' with an empty request.\n" - "Before calling, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 services and their types.\n" - f"2. Find the service type for '{self.service}'.\n" - "3. Retrieve the full message interface definition for that service.\n" - "4. Construct the message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Call the service '{self.service}' using the correct message type and interface.\n" + def __init__( + self, + service: str, + service_args: dict[str, Any], + validators: List[Validator], + task_args: TaskArgs, + ) -> None: + super().__init__( + service, service_args, validators=validators, task_args=task_args ) + def get_base_prompt(self) -> str: + return f"Call service '{self.service}' to get log digest." + + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can discover available logging services, examine the service " + "interface, and call the service with an empty request to retrieve " + "system log information." + ) + class CallVectorStoreRetrievalTask(CustomInterfacesServiceTask): complexity = "easy" - expected_query: str = "What is the purpose of this robot?" - def get_prompt(self) -> str: - return ( - f"You need to call the service '{self.service}' with the query: '{self.expected_query}'.\n" - "Before calling, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 services and their types.\n" - f"2. Find the service type for '{self.service}'.\n" - "3. Retrieve the full message interface definition for that service.\n" - "4. Construct the request message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Call the service '{self.service}' using the correct message type and interface.\n" + def __init__( + self, + service: str, + service_args: dict[str, Any], + validators: List[Validator], + task_args: TaskArgs, + query: str = "What is the purpose of this robot?", + ) -> None: + super().__init__( + service, service_args, validators=validators, task_args=task_args ) + self.expected_query = query + + def get_base_prompt(self) -> str: + return f"Call service '{self.service}' with query '{self.expected_query}'" + + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can discover available knowledge services, examine the service " + f"interface, and call the retrieval service with query='{self.expected_query}' " + "to search the robot's knowledge base." + ) class CallWhatISeeTask(CustomInterfacesServiceTask): complexity = "easy" - expected_observations: List[str] = ["table", "cup", "notebook"] - expected_perception_source: str = "front_camera" - - expected_image: Image = Image( - header=Header(frame_id="camera_frame"), - height=480, - width=640, - ) + def __init__( + self, + service: str, + service_args: dict[str, Any], + validators: List[Validator], + task_args: TaskArgs, + ) -> None: + super().__init__( + service, service_args, validators=validators, task_args=task_args + ) - expected_pose: Pose = Pose( - position=Point(x=1.0, y=2.0, z=0.5), - orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0), - ) + def get_base_prompt(self) -> str: + return f"Call service '{self.service}' to get visual observations." def get_prompt(self) -> str: - return ( - f"You need to call the service '{self.service}' with an empty request.\n" - "Before calling, follow these steps:\n" - "1. Use the tool to retrieve the available ROS2 services and their types.\n" - f"2. Find the service type for '{self.service}'.\n" - "3. Retrieve the full message interface definition for that service.\n" - "4. Construct the request message filling only the fields you are instructed to. Rest of the fields will have default values.\n" - f"5. Call the service '{self.service}' using the correct message type and interface.\n" - ) + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can discover available vision services, examine the service " + "interface, and call the service with an empty request to get " + "visual observations and camera pose information." + ) diff --git a/src/rai_bench/rai_bench/tool_calling_agent/tasks/manipulation.py b/src/rai_bench/rai_bench/tool_calling_agent/tasks/manipulation.py index 1cf65fa37..bf6bea985 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/tasks/manipulation.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/tasks/manipulation.py @@ -12,25 +12,52 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging from abc import ABC, abstractmethod -from typing import Dict, List +from typing import Any, Dict, List import inflect from langchain_core.tools import BaseTool from rai.tools.ros2 import MoveToPointToolInput from rai.types import Point -from rai_bench.tool_calling_agent.interfaces import Task, Validator +from rai_bench.tool_calling_agent.interfaces import Task, TaskArgs, Validator +from rai_bench.tool_calling_agent.mocked_ros2_interfaces import ( + COMMON_INTERFACES, + COMMON_SERVICES_AND_TYPES, + COMMON_TOPICS_AND_TYPES, + MANIPULATION_ACTIONS_AND_TYPES, + MANIPULATION_INTERFACES, + MANIPULATION_SERVICES_AND_TYPES, + MANIPULATION_TOPICS_AND_TYPES, +) from rai_bench.tool_calling_agent.mocked_tools import ( MockGetObjectPositionsTool, + MockGetROS2MessageInterfaceTool, + MockGetROS2ServicesNamesAndTypesTool, MockGetROS2TopicsNamesAndTypesTool, MockMoveToPointTool, ) -loggers_type = logging.Logger +INTERFACES = COMMON_INTERFACES | MANIPULATION_INTERFACES +TOPCIS_AND_TYPES = COMMON_TOPICS_AND_TYPES | MANIPULATION_TOPICS_AND_TYPES +SERVICES_AND_TYPES = COMMON_SERVICES_AND_TYPES | MANIPULATION_SERVICES_AND_TYPES + +TOPIC_STRINGS = [ + f"topic: {topic}\ntype: {topic_type}\n" + for topic, topic_type in COMMON_TOPICS_AND_TYPES.items() +] + +ACTION_STRINGS = [ + f"action: {action}\ntype: {act_type}\n" + for action, act_type in MANIPULATION_ACTIONS_AND_TYPES.items() +] + +SERVICE_STRINGS = [ + f"service: {service}\ntype: {srv_type}\n" + for service, srv_type in SERVICES_AND_TYPES.items() +] -PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT = """ +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT = """ You are a robotic arm with interfaces to detect and manipulate objects. Here are the coordinates information: x - front to back (positive is forward) @@ -38,6 +65,23 @@ z - up to down (positive is up). """ +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT = ( + PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT + + """ +Example of tool calls: +- get_object_positions, args: {} +- move_to_point, args: {'x': 0.5, 'y': 0.2, 'z': 0.3, 'task': 'grab'}""" +) + +PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_5_SHOT = ( + PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT + + """ +- move_to_point, args: {'x': 1.7, 'y': 1.8, 'z': 1.9, 'task': 'drop'} +- move_to_point, args: {'x': 0.1, 'y': -0.2, 'z': 0.1, 'task': 'grab'} +- move_to_point, args: {'x': 0.7, 'y': 0.8, 'z': 0.9, 'task': 'drop'} +""" +) + class TaskParametrizationError(Exception): """Exception raised when the task parameters are not valid.""" @@ -46,48 +90,28 @@ class TaskParametrizationError(Exception): class ManipulationTask(Task, ABC): - @property - def type(self) -> str: - return "manipulation" - - def get_system_prompt(self) -> str: - return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT - + type = "manipulation" -class GrabTask(ManipulationTask, ABC): def __init__( self, objects: Dict[str, List[Point]], - object_to_grab: str, validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, + task_args: TaskArgs, + **kwargs: Any, ) -> None: - super().__init__( - validators=validators, - extra_tool_calls=extra_tool_calls, - logger=logger, - ) + super().__init__(validators=validators, task_args=task_args, **kwargs) self.objects = objects - self.object_to_grab = object_to_grab self._verify_args() - @abstractmethod - def _verify_args(self) -> None: - pass + @property + def optional_tool_calls_number(self) -> int: + return 0 @property def available_tools(self) -> List[BaseTool]: return [ MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /camera_image_color\ntype: sensor_msgs/msg/Image\n", - "topic: /camera_image_depth\ntype: sensor_msgs/msg/Image\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - "topic: /color_camera_info\ntype: sensor_msgs/msg/CameraInfo\n", - ] + mock_topics_names_and_types=TOPIC_STRINGS ), MockGetObjectPositionsTool( target_frame="panda_link0", @@ -98,41 +122,76 @@ def available_tools(self) -> List[BaseTool]: mock_objects=self.objects, ), MockMoveToPointTool(manipulator_frame="panda_link0"), + MockGetROS2ServicesNamesAndTypesTool( + mock_service_names_and_types=SERVICE_STRINGS + ), + MockGetROS2MessageInterfaceTool(mock_interfaces=INTERFACES), ] + def _verify_args(self) -> None: + pass + + def get_system_prompt(self) -> str: + if self.n_shots == 0: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_0_SHOT + elif self.n_shots == 2: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_2_SHOT + else: + return PROACTIVE_ROS2_EXPERT_SYSTEM_PROMPT_5_SHOT + + +class GrabTask(ManipulationTask, ABC): + def __init__( + self, + objects: Dict[str, List[Point]], + object_to_grab: str, + validators: List[Validator], + task_args: TaskArgs, + **kwargs: Any, + ) -> None: + super().__init__( + validators=validators, objects=objects, task_args=task_args, **kwargs + ) + self.object_to_grab = object_to_grab + self._verify_args() + + @abstractmethod + def _verify_args(self) -> None: + pass + class MoveToPointTask(ManipulationTask): complexity = "easy" def __init__( self, + objects: Dict[str, List[Point]], move_to_tool_input: MoveToPointToolInput, validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, + task_args: TaskArgs, + **kwargs: Any, ) -> None: super().__init__( - validators=validators, extra_tool_calls=extra_tool_calls, logger=logger + validators=validators, objects=objects, task_args=task_args, **kwargs ) - self.move_to_tool_input = move_to_tool_input - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /pointcloud\ntype: sensor_msgs/msg/PointCloud2\n", - "topic: /robot_description\ntype: std_msgs/msg/String\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /tf\ntype: tf2_msgs/msg/TFMessage\n", - ] - ), - MockMoveToPointTool(manipulator_frame="base_link"), - ] + def get_base_prompt(self) -> str: + return ( + f"Move the arm to point x={self.move_to_tool_input.x}, " + f"y={self.move_to_tool_input.y}, z={self.move_to_tool_input.z} " + f"to {self.move_to_tool_input.task} an object." + ) def get_prompt(self) -> str: - return f"Move the arm to a point x={self.move_to_tool_input.x}, y={self.move_to_tool_input.y}, z={self.move_to_tool_input.z} to {self.move_to_tool_input.task} an object." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can control the arm movement to the specified coordinates " + f"and perform the {self.move_to_tool_input.task} action at that location." + ) class GetObjectPositionsTask(ManipulationTask): @@ -142,42 +201,15 @@ def __init__( self, objects: Dict[str, List[Point]], validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, + task_args: TaskArgs, + **kwargs: Any, ) -> None: super().__init__( - validators=validators, extra_tool_calls=extra_tool_calls, logger=logger + validators=validators, objects=objects, task_args=task_args, **kwargs ) - """Task to get the positions of the objects - - Examples - -------- - objects = { - "banana": [(0.1, 0.2, 0.3), (0.4, 0.5, 0.6)], - "cube": [(0.7, 0.8, 0.9)], - } - """ self.objects = objects - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /pointcloud\ntype: sensor_msgs/msg/PointCloud2\n", - "topic: /robot_description\ntype: std_msgs/msg/String\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /tf\ntype: tf2_msgs/msg/TFMessage\n", - ] - ), - MockGetObjectPositionsTool(mock_objects=self.objects), - ] - - def get_prompt(self) -> str: - """Generates a prompt based on the objects provided in the task. If there is more than one object, the object in the prompt will be pluralized. - Returns: - str: Formatted prompt for the task - """ + def get_base_prompt(self) -> str: inflector = inflect.engine() object_counts = {obj: len(positions) for obj, positions in self.objects.items()} formatted_objects = [ @@ -190,24 +222,35 @@ def get_prompt(self) -> str: ) else: objects_list = formatted_objects[0] + return f"Get the {objects_list} positions." + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can detect all objects and retrieve their 3D coordinates " + "for manipulation planning." + ) + class GrabExistingObjectTask(GrabTask): complexity = "medium" - """ - Task to grab an object. - Parameters - ---------- - objects : Dict[str, List[dict[str, float]]] - Dictionary of object types and their positions. - object_to_grab : str - The object to be grabbed (must have a single position). - """ + def get_base_prompt(self) -> str: + return f"Grab {self.object_to_grab}." def get_prompt(self) -> str: - return f"Grab {self.object_to_grab}." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can locate the object in the workspace and move the arm " + "to grab it at the correct coordinates." + ) def _verify_args(self): if self.object_to_grab not in self.objects: @@ -223,19 +266,19 @@ def _verify_args(self): class GrabNotExistingObjectTask(GrabTask): complexity = "medium" - """ - Task to attempt grabbing an object that does not exist. - Parameters - ---------- - objects : Dict[str, List[dict[str, float]]] - Available objects and their positions. - object_to_grab : str - Object that should not be present in the list. - """ + def get_base_prompt(self) -> str: + return f"Grab {self.object_to_grab}." def get_prompt(self) -> str: - return f"Grab {self.object_to_grab}." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can check if the object exists in the environment and " + "attempt to grab it if found." + ) def _verify_args(self): if self.object_to_grab in self.objects: @@ -245,30 +288,20 @@ def _verify_args(self): class MoveExistingObjectLeftTask(GrabTask): - """Task to move an existing object to the left. - - Parameters - ---------- - objects : Dict[str, List[dict[str, float]]] - Dictionary containing the object types and their positions. Object type should be passed as singular. - object_to_grab : str - Object type should be passed as singular. Object to be grabbed should be defined in the objects argument with only one instance (one position). - logger : loggers_type | None, optional - Logger, by default None - - Examples - -------- - objects = { - "banana": [(0.1, 0.2, 0.3), (0.4, 0.5, 0.6)], - "cube": [(0.7, 0.8, 0.9)], - } - object_to_grab = "cube" - """ + complexity = "hard" - complexity = "medium" + def get_base_prompt(self) -> str: + return f"Move {self.object_to_grab} 20 cm to the left." def get_prompt(self) -> str: - return f"Move {self.object_to_grab} 20 cm to the left." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can locate the object, grab it with the manipulator, " + "and move it to a position 20 cm to the left of its current location." + ) def _verify_args(self): if self.object_to_grab not in self.objects: @@ -283,22 +316,20 @@ def _verify_args(self): class MoveExistingObjectFrontTask(GrabTask): - """Task to move an existing object to the front - - Parameters - ---------- - objects : Dict[str, List[dict[str, float]]] - Dictionary containing the object types and their positions. Object type should be passed as singular. - object_to_grab : str - Object to grab. Object type should be passed as singular. Object to be grabbed should be defined in the objects argument with only one instance (one position). - logger : loggers_type | None, optional - Logger, by default None - """ + complexity = "hard" - complexity = "medium" + def get_base_prompt(self) -> str: + return f"Move {self.object_to_grab} 60 cm to the front." def get_prompt(self) -> str: - return f"Move {self.object_to_grab} 60 cm to the front." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can locate the object, grab it with the manipulator, " + "and move it to a position 60 cm forward from its current location." + ) def _verify_args(self): if self.object_to_grab not in self.objects: @@ -312,71 +343,37 @@ def _verify_args(self): raise TaskParametrizationError(error_message) -class SwapObjectsTask(Task): - """Task to swap objects - - Parameters - ---------- - objects : Dict[str, List[Dict[str, float]]] - Dictionary containing the object types and their positions. Object type should be passed as singular. - objects_to_swap : List[str] - Objects to be swapped. Object type should be passed as singular. Objects to be swapped should be defined in the objects argument with only one instance (one position). - logger : loggers_type | None, optional - Logger, by default None - - Examples - -------- - objects = { - "banana": [(0.1, 0.2, 0.1)], - "cube": [(0.7, 0.8, 0.1)], - "apple": [(0.3, 0.4, 0.1), (0.5, 0.6, 0.1)], - - } - objects_to_swap = ["cube", "banana"] - """ - +class SwapObjectsTask(ManipulationTask): complexity = "hard" def __init__( self, objects: Dict[str, List[Point]], - objects_to_swap: str, + objects_to_swap: List[str], validators: List[Validator], - extra_tool_calls: int = 0, - logger: loggers_type | None = None, + task_args: TaskArgs, + **kwargs: Any, ) -> None: super().__init__( - validators=validators, - extra_tool_calls=extra_tool_calls, - logger=logger, + validators=validators, objects=objects, task_args=task_args, **kwargs ) self.objects = objects self.objects_to_swap = objects_to_swap self._verify_args() - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=[ - "topic: /attached_collision_object\ntype: moveit_msgs/msg/AttachedCollisionObject\n", - "topic: /camera_image_color\ntype: sensor_msgs/msg/Image\n", - "topic: /camera_image_depth\ntype: sensor_msgs/msg/Image\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /collision_object\ntype: moveit_msgs/msg/CollisionObject\n", - "topic: /color_camera_info\ntype: sensor_msgs/msg/CameraInfo\n", - ] - ), - MockGetObjectPositionsTool( - target_frame="panda_link0", - source_frame="RGBDCamera5", - camera_topic="/color_image5", - depth_topic="/depth_image5", - camera_info_topic="/color_camera_info5", - mock_objects=self.objects, - ), - MockMoveToPointTool(manipulator_frame="panda_link0"), - ] + def get_base_prompt(self) -> str: + return f"Swap {self.objects_to_swap[0]} and {self.objects_to_swap[1]}." + + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can locate both objects in the workspace, then perform a sequence " + f"of grab and move operations to swap the positions of {self.objects_to_swap[0]} " + f"and {self.objects_to_swap[1]}." + ) def _verify_args(self): for obj in self.objects_to_swap: @@ -392,6 +389,3 @@ def _verify_args(self): error_message = f"Number of requested objects to swap {len(self.objects_to_swap)} should be equal to 2." self.logger.error(msg=error_message) raise TaskParametrizationError(error_message) - - def get_prompt(self) -> str: - return f"Move {self.objects_to_swap[0]} to the initial position of {self.objects_to_swap[1]}, and move {self.objects_to_swap[1]} to the initial position of {self.objects_to_swap[0]}." diff --git a/src/rai_bench/rai_bench/tool_calling_agent/tasks/navigation.py b/src/rai_bench/rai_bench/tool_calling_agent/tasks/navigation.py index 037872e04..79514d256 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/tasks/navigation.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/tasks/navigation.py @@ -12,44 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -from typing import Dict, List, Type +from typing import List from langchain_core.tools import BaseTool -from pydantic import BaseModel -from rai_open_set_vision.tools.gdino_tools import ( - DistanceMeasurement, -) from rai_bench.tool_calling_agent.interfaces import Task -from rai_bench.tool_calling_agent.messages.actions import ( - AssistedTeleopGoal, - BackUpGoal, - ComputePathThroughPosesGoal, - ComputePathToPoseGoal, - DriveOnHeadingGoal, - FollowPathGoal, - FollowWaypointsGoal, - NavigateThroughPosesGoal, - NavigateToPoseGoal, - SmoothPathGoal, - SpinGoal, - WaitGoal, +from rai_bench.tool_calling_agent.mocked_ros2_interfaces import ( + COMMON_SERVICES_AND_TYPES, + COMMON_TOPICS_AND_TYPES, + NAVIGATION_ACTION_MODELS, + NAVIGATION_ACTIONS_AND_TYPES, + NAVIGATION_INTERFACES, + NAVIGATION_SERVICES_AND_TYPES, + NAVIGATION_TOPICS_AND_TYPES, ) from rai_bench.tool_calling_agent.mocked_tools import ( MockActionsToolkit, - MockGetDistanceToObjectsTool, - MockGetROS2ActionFeedbackTool, - MockGetROS2ActionResultTool, - MockGetROS2ActionsNamesAndTypesTool, MockGetROS2MessageInterfaceTool, + MockGetROS2ServicesNamesAndTypesTool, MockGetROS2TopicsNamesAndTypesTool, - MockStartROS2ActionTool, ) -loggers_type = logging.Logger - -ROBOT_NAVIGATION_SYSTEM_PROMPT = """You are an autonomous robot connected to ros2 environment. Your main goal is to fulfill the user's requests. +ROBOT_NAVIGATION_SYSTEM_PROMPT_0_SHOT = """You are an autonomous robot connected to ros2 environment. Your main goal is to fulfill the user's requests. Do not make assumptions about the environment you are currently in. You can use ros2 topics, services and actions to operate. @@ -98,768 +82,36 @@ (0.79, 5.73, 0.0), (0.92, 1.01, 0.0) - Before starting anything, make sure to load available topics, services and actions. + Before starting anything, make sure to load available topics, services and actions.""" + +ROBOT_NAVIGATION_SYSTEM_PROMPT_2_SHOT = ( + ROBOT_NAVIGATION_SYSTEM_PROMPT_0_SHOT + + """ + Example tool calls: - - get_ros2_message_interface, args: {'msg_type': 'turtlesim/srv/TeleportAbsolute'} - - publish_ros2_message, args: {'topic': '/cmd_vel', 'message_type': 'geometry_msgs/msg/Twist', 'message': {linear: {x: 0.5, y: 0.0, z: 0.0}, angular: {x: 0.0, y: 0.0, z: 1.0}}} - - start_ros2_action, args: {'action_name': '/dock', 'action_type': 'nav2_msgs/action/Dock', 'action_args': {}} - """ - -TOPICS_NAMES_AND_TYPES = [ - "topic: /assisted_teleop/_action/feedback\ntype: nav2_msgs/action/AssistedTeleop_FeedbackMessage\n", - "topic: /assisted_teleop/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /backup/_action/feedback\ntype: nav2_msgs/action/BackUp_FeedbackMessage\n", - "topic: /backup/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /behavior_server/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /behavior_tree_log\ntype: nav2_msgs/msg/BehaviorTreeLog\n", - "topic: /bond\ntype: bond/msg/Status\n", - "topic: /bt_navigator/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /camera/camera/color/camera_info\ntype: sensor_msgs/msg/CameraInfo\n", - "topic: /camera/camera/color/image_raw\ntype: sensor_msgs/msg/Image\n", - "topic: /camera/camera/depth/camera_info\ntype: sensor_msgs/msg/CameraInfo\n", - "topic: /camera/camera/depth/image_rect_raw\ntype: sensor_msgs/msg/Image\n", - "topic: /clock\ntype: rosgraph_msgs/msg/Clock\n", - "topic: /cmd_vel_nav\ntype: geometry_msgs/msg/Twist\n", - "topic: /cmd_vel_teleop\ntype: geometry_msgs/msg/Twist\n", - "topic: /compute_path_through_poses/_action/feedback\ntype: nav2_msgs/action/ComputePathThroughPoses_FeedbackMessage\n", - "topic: /compute_path_through_poses/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /compute_path_to_pose/_action/feedback\ntype: nav2_msgs/action/ComputePathToPose_FeedbackMessage\n", - "topic: /compute_path_to_pose/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /controller_server/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /diagnostics\ntype: diagnostic_msgs/msg/DiagnosticArray\n", - "topic: /drive_on_heading/_action/feedback\ntype: nav2_msgs/action/DriveOnHeading_FeedbackMessage\n", - "topic: /drive_on_heading/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /follow_path/_action/feedback\ntype: nav2_msgs/action/FollowPath_FeedbackMessage\n", - "topic: /follow_path/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /follow_waypoints/_action/feedback\ntype: nav2_msgs/action/FollowWaypoints_FeedbackMessage\n", - "topic: /follow_waypoints/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /global_costmap/costmap\ntype: nav_msgs/msg/OccupancyGrid\n", - "topic: /global_costmap/costmap_raw\ntype: nav2_msgs/msg/Costmap\n", - "topic: /global_costmap/costmap_updates\ntype: map_msgs/msg/OccupancyGridUpdate\n", - "topic: /global_costmap/footprint\ntype: geometry_msgs/msg/Polygon\n", - "topic: /global_costmap/global_costmap/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /global_costmap/published_footprint\ntype: geometry_msgs/msg/PolygonStamped\n", - "topic: /global_costmap/scan\ntype: sensor_msgs/msg/LaserScan\n", - "topic: /goal_pose\ntype: geometry_msgs/msg/PoseStamped\n", - "topic: /led_strip\ntype: sensor_msgs/msg/Image\n", - "topic: /local_costmap/costmap\ntype: nav_msgs/msg/OccupancyGrid\n", - "topic: /local_costmap/costmap_raw\ntype: nav2_msgs/msg/Costmap\n", - "topic: /local_costmap/costmap_updates\ntype: map_msgs/msg/OccupancyGridUpdate\n", - "topic: /local_costmap/footprint\ntype: geometry_msgs/msg/Polygon\n", - "topic: /local_costmap/local_costmap/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /local_costmap/published_footprint\ntype: geometry_msgs/msg/PolygonStamped\n", - "topic: /local_costmap/scan\ntype: sensor_msgs/msg/LaserScan\n", - "topic: /map\ntype: nav_msgs/msg/OccupancyGrid\n", - "topic: /map_metadata\ntype: nav_msgs/msg/MapMetaData\n", - "topic: /map_saver/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /navigate_through_poses/_action/feedback\ntype: nav2_msgs/action/NavigateThroughPoses_FeedbackMessage\n", - "topic: /navigate_through_poses/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /navigate_to_pose/_action/feedback\ntype: nav2_msgs/action/NavigateToPose_FeedbackMessage\n", - "topic: /navigate_to_pose/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /odom\ntype: nav_msgs/msg/Odometry\n", - "topic: /odometry/filtered\ntype: nav_msgs/msg/Odometry\n", - "topic: /parameter_events\ntype: rcl_interfaces/msg/ParameterEvent\n", - "topic: /plan\ntype: nav_msgs/msg/Path\n", - "topic: /plan_smoothed\ntype: nav_msgs/msg/Path\n", - "topic: /planner_server/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /pose\ntype: geometry_msgs/msg/PoseWithCovarianceStamped\n", - "topic: /preempt_teleop\ntype: std_msgs/msg/Empty\n", - "topic: /rosout\ntype: rcl_interfaces/msg/Log\n", - "topic: /scan\ntype: sensor_msgs/msg/LaserScan\n", - "topic: /slam_toolbox/feedback\ntype: visualization_msgs/msg/InteractiveMarkerFeedback\n", - "topic: /slam_toolbox/graph_visualization\ntype: visualization_msgs/msg/MarkerArray\n", - "topic: /slam_toolbox/scan_visualization\ntype: sensor_msgs/msg/LaserScan\n", - "topic: /slam_toolbox/update\ntype: visualization_msgs/msg/InteractiveMarkerUpdate\n", - "topic: /smooth_path/_action/feedback\ntype: nav2_msgs/action/SmoothPath_FeedbackMessage\n", - "topic: /smooth_path/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /smoother_server/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /speed_limit\ntype: nav2_msgs/msg/SpeedLimit\n", - "topic: /spin/_action/feedback\ntype: nav2_msgs/action/Spin_FeedbackMessage\n", - "topic: /spin/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /tf_static\ntype: tf2_msgs/msg/TFMessage\n", - "topic: /trajectories\ntype: visualization_msgs/msg/MarkerArray\n", - "topic: /transformed_global_plan\ntype: nav_msgs/msg/Path\n", - "topic: /unsmoothed_plan\ntype: nav_msgs/msg/Path\n", - "topic: /velocity_smoother/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", - "topic: /wait/_action/feedback\ntype: nav2_msgs/action/Wait_FeedbackMessage\n", - "topic: /wait/_action/status\ntype: action_msgs/msg/GoalStatusArray\n", - "topic: /waypoint_follower/transition_event\ntype: lifecycle_msgs/msg/TransitionEvent\n", + - get_ros2_actions_names_and_types, args: {} + - start_ros2_action, args: {'action': '/navigate_to_pose', 'action_type': 'nav2_msgs/action/NavigateToPose', 'goal': {'pose': {'header': {'frame_id': 'map'}, 'pose': {'position': {'x': 2.0, 'y': 2.0, 'z': 0.0}}}}}""" +) + +ROBOT_NAVIGATION_SYSTEM_PROMPT_5_SHOT = ( + ROBOT_NAVIGATION_SYSTEM_PROMPT_2_SHOT + + """ + - get_ros2_message_interface, args: {'msg_type': 'nav2_msgs/action/Spin'} + - start_ros2_action, args: {'action': '/spin', 'action_type': 'nav2_msgs/action/Spin', 'goal': {'target_yaw': 3.14}} + - start_ros2_action, args: {'action': '/drive_on_heading', 'action_type': 'nav2_msgs/action/DriveOnHeading', 'goal': {'target': {'x': 1.0, 'y': 0.0, 'z': 0.0}, 'speed': 0.5}}""" +) +TOPICS_AND_TYPES = COMMON_TOPICS_AND_TYPES | NAVIGATION_TOPICS_AND_TYPES +SERVICES_AND_TYPES = COMMON_SERVICES_AND_TYPES | NAVIGATION_SERVICES_AND_TYPES + + +TOPIC_STRINGS = [ + f"topic: {topic}\ntype: {topic_type}\n" + for topic, topic_type in COMMON_TOPICS_AND_TYPES.items() ] -ACTIONS_AND_TYPES: Dict[str, str] = { - "/assisted_teleop": "nav2_msgs/action/AssistedTeleop", - "/backup": "nav2_msgs/action/BackUp", - "/compute_path_through_poses": "nav2_msgs/action/ComputePathThroughPoses", - "/compute_path_to_pose": "nav2_msgs/action/ComputePathToPose", - "/drive_on_heading": "nav2_msgs/action/DriveOnHeading", - "/follow_path": "nav2_msgs/action/FollowPath", - "/follow_waypoints": "nav2_msgs/action/FollowWaypoints", - "/navigate_through_poses": "nav2_msgs/action/NavigateThroughPoses", - "/navigate_to_pose": "nav2_msgs/action/NavigateToPose", - "/smooth_path": "nav2_msgs/action/SmoothPath", - "/spin": "nav2_msgs/action/Spin", - "/wait": "nav2_msgs/action/Wait", -} - -SERVICES_AND_TYPES: Dict[str, str] = { - "/assisted_teleop/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/assisted_teleop/_action/get_result": "nav2_msgs/action/AssistedTeleop_GetResult", - "/assisted_teleop/_action/send_goal": "nav2_msgs/action/AssistedTeleop_SendGoal", - "/backup/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/backup/_action/get_result": "nav2_msgs/action/BackUp_GetResult", - "/backup/_action/send_goal": "nav2_msgs/action/BackUp_SendGoal", - "/behavior_server/change_state": "lifecycle_msgs/srv/ChangeState", - "/behavior_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/behavior_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/behavior_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/behavior_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/behavior_server/get_parameters": "rcl_interfaces/srv/GetParameters", - "/behavior_server/get_state": "lifecycle_msgs/srv/GetState", - "/behavior_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/behavior_server/list_parameters": "rcl_interfaces/srv/ListParameters", - "/behavior_server/set_parameters": "rcl_interfaces/srv/SetParameters", - "/behavior_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/bt_navigator/change_state": "lifecycle_msgs/srv/ChangeState", - "/bt_navigator/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/bt_navigator/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/bt_navigator/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/bt_navigator/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/bt_navigator/get_parameters": "rcl_interfaces/srv/GetParameters", - "/bt_navigator/get_state": "lifecycle_msgs/srv/GetState", - "/bt_navigator/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/bt_navigator/list_parameters": "rcl_interfaces/srv/ListParameters", - "/bt_navigator/set_parameters": "rcl_interfaces/srv/SetParameters", - "/bt_navigator/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/bt_navigator_navigate_through_poses_rclcpp_node/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/bt_navigator_navigate_through_poses_rclcpp_node/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/bt_navigator_navigate_through_poses_rclcpp_node/get_parameters": "rcl_interfaces/srv/GetParameters", - "/bt_navigator_navigate_through_poses_rclcpp_node/list_parameters": "rcl_interfaces/srv/ListParameters", - "/bt_navigator_navigate_through_poses_rclcpp_node/set_parameters": "rcl_interfaces/srv/SetParameters", - "/bt_navigator_navigate_through_poses_rclcpp_node/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/bt_navigator_navigate_to_pose_rclcpp_node/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/bt_navigator_navigate_to_pose_rclcpp_node/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/bt_navigator_navigate_to_pose_rclcpp_node/get_parameters": "rcl_interfaces/srv/GetParameters", - "/bt_navigator_navigate_to_pose_rclcpp_node/list_parameters": "rcl_interfaces/srv/ListParameters", - "/bt_navigator_navigate_to_pose_rclcpp_node/set_parameters": "rcl_interfaces/srv/SetParameters", - "/bt_navigator_navigate_to_pose_rclcpp_node/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/compute_path_through_poses/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/compute_path_through_poses/_action/get_result": "nav2_msgs/action/ComputePathThroughPoses_GetResult", - "/compute_path_through_poses/_action/send_goal": "nav2_msgs/action/ComputePathThroughPoses_SendGoal", - "/compute_path_to_pose/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/compute_path_to_pose/_action/get_result": "nav2_msgs/action/ComputePathToPose_GetResult", - "/compute_path_to_pose/_action/send_goal": "nav2_msgs/action/ComputePathToPose_SendGoal", - "/controller_server/change_state": "lifecycle_msgs/srv/ChangeState", - "/controller_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/controller_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/controller_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/controller_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/controller_server/get_parameters": "rcl_interfaces/srv/GetParameters", - "/controller_server/get_state": "lifecycle_msgs/srv/GetState", - "/controller_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/controller_server/list_parameters": "rcl_interfaces/srv/ListParameters", - "/controller_server/set_parameters": "rcl_interfaces/srv/SetParameters", - "/controller_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/drive_on_heading/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/drive_on_heading/_action/get_result": "nav2_msgs/action/DriveOnHeading_GetResult", - "/drive_on_heading/_action/send_goal": "nav2_msgs/action/DriveOnHeading_SendGoal", - "/follow_path/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/follow_path/_action/get_result": "nav2_msgs/action/FollowPath_GetResult", - "/follow_path/_action/send_goal": "nav2_msgs/action/FollowPath_SendGoal", - "/follow_waypoints/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/follow_waypoints/_action/get_result": "nav2_msgs/action/FollowWaypoints_GetResult", - "/follow_waypoints/_action/send_goal": "nav2_msgs/action/FollowWaypoints_SendGoal", - "/global_costmap/clear_around_global_costmap": "nav2_msgs/srv/ClearCostmapAroundRobot", - "/global_costmap/clear_entirely_global_costmap": "nav2_msgs/srv/ClearEntireCostmap", - "/global_costmap/clear_except_global_costmap": "nav2_msgs/srv/ClearCostmapExceptRegion", - "/global_costmap/get_costmap": "nav2_msgs/srv/GetCostmap", - "/global_costmap/global_costmap/change_state": "lifecycle_msgs/srv/ChangeState", - "/global_costmap/global_costmap/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/global_costmap/global_costmap/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/global_costmap/global_costmap/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/global_costmap/global_costmap/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/global_costmap/global_costmap/get_parameters": "rcl_interfaces/srv/GetParameters", - "/global_costmap/global_costmap/get_state": "lifecycle_msgs/srv/GetState", - "/global_costmap/global_costmap/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/global_costmap/global_costmap/list_parameters": "rcl_interfaces/srv/ListParameters", - "/global_costmap/global_costmap/set_parameters": "rcl_interfaces/srv/SetParameters", - "/global_costmap/global_costmap/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/grounded_sam/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/grounded_sam/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/grounded_sam/get_parameters": "rcl_interfaces/srv/GetParameters", - "/grounded_sam/list_parameters": "rcl_interfaces/srv/ListParameters", - "/grounded_sam/set_parameters": "rcl_interfaces/srv/SetParameters", - "/grounded_sam/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/grounded_sam_segment": "rai_interfaces/srv/RAIGroundedSam", - "/grounding_dino/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/grounding_dino/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/grounding_dino/get_parameters": "rcl_interfaces/srv/GetParameters", - "/grounding_dino/list_parameters": "rcl_interfaces/srv/ListParameters", - "/grounding_dino/set_parameters": "rcl_interfaces/srv/SetParameters", - "/grounding_dino/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/grounding_dino_classify": "rai_interfaces/srv/RAIGroundingDino", - "/is_path_valid": "nav2_msgs/srv/IsPathValid", - "/launch_ros_138640/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/launch_ros_138640/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/launch_ros_138640/get_parameters": "rcl_interfaces/srv/GetParameters", - "/launch_ros_138640/list_parameters": "rcl_interfaces/srv/ListParameters", - "/launch_ros_138640/set_parameters": "rcl_interfaces/srv/SetParameters", - "/launch_ros_138640/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/lifecycle_manager_navigation/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/lifecycle_manager_navigation/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/lifecycle_manager_navigation/get_parameters": "rcl_interfaces/srv/GetParameters", - "/lifecycle_manager_navigation/is_active": "std_srvs/srv/Trigger", - "/lifecycle_manager_navigation/list_parameters": "rcl_interfaces/srv/ListParameters", - "/lifecycle_manager_navigation/manage_nodes": "nav2_msgs/srv/ManageLifecycleNodes", - "/lifecycle_manager_navigation/set_parameters": "rcl_interfaces/srv/SetParameters", - "/lifecycle_manager_navigation/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/lifecycle_manager_slam/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/lifecycle_manager_slam/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/lifecycle_manager_slam/get_parameters": "rcl_interfaces/srv/GetParameters", - "/lifecycle_manager_slam/is_active": "std_srvs/srv/Trigger", - "/lifecycle_manager_slam/list_parameters": "rcl_interfaces/srv/ListParameters", - "/lifecycle_manager_slam/manage_nodes": "nav2_msgs/srv/ManageLifecycleNodes", - "/lifecycle_manager_slam/set_parameters": "rcl_interfaces/srv/SetParameters", - "/lifecycle_manager_slam/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/local_costmap/clear_around_local_costmap": "nav2_msgs/srv/ClearCostmapAroundRobot", - "/local_costmap/clear_entirely_local_costmap": "nav2_msgs/srv/ClearEntireCostmap", - "/local_costmap/clear_except_local_costmap": "nav2_msgs/srv/ClearCostmapExceptRegion", - "/local_costmap/get_costmap": "nav2_msgs/srv/GetCostmap", - "/local_costmap/local_costmap/change_state": "lifecycle_msgs/srv/ChangeState", - "/local_costmap/local_costmap/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/local_costmap/local_costmap/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/local_costmap/local_costmap/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/local_costmap/local_costmap/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/local_costmap/local_costmap/get_parameters": "rcl_interfaces/srv/GetParameters", - "/local_costmap/local_costmap/get_state": "lifecycle_msgs/srv/GetState", - "/local_costmap/local_costmap/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/local_costmap/local_costmap/list_parameters": "rcl_interfaces/srv/ListParameters", - "/local_costmap/local_costmap/set_parameters": "rcl_interfaces/srv/SetParameters", - "/local_costmap/local_costmap/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/map_saver/change_state": "lifecycle_msgs/srv/ChangeState", - "/map_saver/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/map_saver/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/map_saver/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/map_saver/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/map_saver/get_parameters": "rcl_interfaces/srv/GetParameters", - "/map_saver/get_state": "lifecycle_msgs/srv/GetState", - "/map_saver/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/map_saver/list_parameters": "rcl_interfaces/srv/ListParameters", - "/map_saver/save_map": "nav2_msgs/srv/SaveMap", - "/map_saver/set_parameters": "rcl_interfaces/srv/SetParameters", - "/map_saver/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/nav2_container/_container/list_nodes": "composition_interfaces/srv/ListNodes", - "/nav2_container/_container/load_node": "composition_interfaces/srv/LoadNode", - "/nav2_container/_container/unload_node": "composition_interfaces/srv/UnloadNode", - "/navigate_through_poses/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/navigate_through_poses/_action/get_result": "nav2_msgs/action/NavigateThroughPoses_GetResult", - "/navigate_through_poses/_action/send_goal": "nav2_msgs/action/NavigateThroughPoses_SendGoal", - "/navigate_to_pose/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/navigate_to_pose/_action/get_result": "nav2_msgs/action/NavigateToPose_GetResult", - "/navigate_to_pose/_action/send_goal": "nav2_msgs/action/NavigateToPose_SendGoal", - "/o3de_ros2_node/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/o3de_ros2_node/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/o3de_ros2_node/get_parameters": "rcl_interfaces/srv/GetParameters", - "/o3de_ros2_node/list_parameters": "rcl_interfaces/srv/ListParameters", - "/o3de_ros2_node/set_parameters": "rcl_interfaces/srv/SetParameters", - "/o3de_ros2_node/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/planner_server/change_state": "lifecycle_msgs/srv/ChangeState", - "/planner_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/planner_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/planner_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/planner_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/planner_server/get_parameters": "rcl_interfaces/srv/GetParameters", - "/planner_server/get_state": "lifecycle_msgs/srv/GetState", - "/planner_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/planner_server/list_parameters": "rcl_interfaces/srv/ListParameters", - "/planner_server/set_parameters": "rcl_interfaces/srv/SetParameters", - "/planner_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/rai_ros2_ari_connector_b6ed00ab6356/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/rai_ros2_ari_connector_b6ed00ab6356/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/rai_ros2_ari_connector_b6ed00ab6356/get_parameters": "rcl_interfaces/srv/GetParameters", - "/rai_ros2_ari_connector_b6ed00ab6356/list_parameters": "rcl_interfaces/srv/ListParameters", - "/rai_ros2_ari_connector_b6ed00ab6356/set_parameters": "rcl_interfaces/srv/SetParameters", - "/rai_ros2_ari_connector_b6ed00ab6356/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/slam_toolbox/clear_changes": "slam_toolbox/srv/Clear", - "/slam_toolbox/clear_queue": "slam_toolbox/srv/ClearQueue", - "/slam_toolbox/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/slam_toolbox/deserialize_map": "slam_toolbox/srv/DeserializePoseGraph", - "/slam_toolbox/dynamic_map": "nav_msgs/srv/GetMap", - "/slam_toolbox/get_interactive_markers": "visualization_msgs/srv/GetInteractiveMarkers", - "/slam_toolbox/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/slam_toolbox/get_parameters": "rcl_interfaces/srv/GetParameters", - "/slam_toolbox/list_parameters": "rcl_interfaces/srv/ListParameters", - "/slam_toolbox/manual_loop_closure": "slam_toolbox/srv/LoopClosure", - "/slam_toolbox/pause_new_measurements": "slam_toolbox/srv/Pause", - "/slam_toolbox/save_map": "slam_toolbox/srv/SaveMap", - "/slam_toolbox/serialize_map": "slam_toolbox/srv/SerializePoseGraph", - "/slam_toolbox/set_parameters": "rcl_interfaces/srv/SetParameters", - "/slam_toolbox/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/slam_toolbox/toggle_interactive_mode": "slam_toolbox/srv/ToggleInteractive", - "/smooth_path/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/smooth_path/_action/get_result": "nav2_msgs/action/SmoothPath_GetResult", - "/smooth_path/_action/send_goal": "nav2_msgs/action/SmoothPath_SendGoal", - "/smoother_server/change_state": "lifecycle_msgs/srv/ChangeState", - "/smoother_server/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/smoother_server/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/smoother_server/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/smoother_server/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/smoother_server/get_parameters": "rcl_interfaces/srv/GetParameters", - "/smoother_server/get_state": "lifecycle_msgs/srv/GetState", - "/smoother_server/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/smoother_server/list_parameters": "rcl_interfaces/srv/ListParameters", - "/smoother_server/set_parameters": "rcl_interfaces/srv/SetParameters", - "/smoother_server/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/spin/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/spin/_action/get_result": "nav2_msgs/action/Spin_GetResult", - "/spin/_action/send_goal": "nav2_msgs/action/Spin_SendGoal", - "/tf2_frames": "tf2_msgs/srv/FrameGraph", - "/velocity_smoother/change_state": "lifecycle_msgs/srv/ChangeState", - "/velocity_smoother/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/velocity_smoother/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/velocity_smoother/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/velocity_smoother/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/velocity_smoother/get_parameters": "rcl_interfaces/srv/GetParameters", - "/velocity_smoother/get_state": "lifecycle_msgs/srv/GetState", - "/velocity_smoother/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/velocity_smoother/list_parameters": "rcl_interfaces/srv/ListParameters", - "/velocity_smoother/set_parameters": "rcl_interfaces/srv/SetParameters", - "/velocity_smoother/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", - "/wait/_action/cancel_goal": "action_msgs/srv/CancelGoal", - "/wait/_action/get_result": "nav2_msgs/action/Wait_GetResult", - "/wait/_action/send_goal": "nav2_msgs/action/Wait_SendGoal", - "/waypoint_follower/change_state": "lifecycle_msgs/srv/ChangeState", - "/waypoint_follower/describe_parameters": "rcl_interfaces/srv/DescribeParameters", - "/waypoint_follower/get_available_states": "lifecycle_msgs/srv/GetAvailableStates", - "/waypoint_follower/get_available_transitions": "lifecycle_msgs/srv/GetAvailableTransitions", - "/waypoint_follower/get_parameter_types": "rcl_interfaces/srv/GetParameterTypes", - "/waypoint_follower/get_parameters": "rcl_interfaces/srv/GetParameters", - "/waypoint_follower/get_state": "lifecycle_msgs/srv/GetState", - "/waypoint_follower/get_transition_graph": "lifecycle_msgs/srv/GetAvailableTransitions", - "/waypoint_follower/list_parameters": "rcl_interfaces/srv/ListParameters", - "/waypoint_follower/set_parameters": "rcl_interfaces/srv/SetParameters", - "/waypoint_follower/set_parameters_atomically": "rcl_interfaces/srv/SetParametersAtomically", -} -INTERFACES: Dict[str, str] = { - "nav2_msgs/action/NavigateToPose": """#goal definition -geometry_msgs/PoseStamped pose - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -string behavior_tree ---- -#result definition -std_msgs/Empty result ---- -#feedback definition -geometry_msgs/PoseStamped current_pose - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -builtin_interfaces/Duration navigation_time - int32 sec - uint32 nanosec -builtin_interfaces/Duration estimated_time_remaining - int32 sec - uint32 nanosec -int16 number_of_recoveries -float32 distance_remaining -""", - "nav2_msgs/action/AssistedTeleop": """#goal definition -builtin_interfaces/Duration time_allowance - int32 sec - uint32 nanosec ---- -#result definition -builtin_interfaces/Duration total_elapsed_time - int32 sec - uint32 nanosec ---- -#feedback -builtin_interfaces/Duration current_teleop_duration - int32 sec - uint32 nanosec""", - "nav2_msgs/action/BackUp": """#goal definition -geometry_msgs/Point target - float64 x - float64 y - float64 z -float32 speed -builtin_interfaces/Duration time_allowance - int32 sec - uint32 nanosec ---- -#result definition -builtin_interfaces/Duration total_elapsed_time - int32 sec - uint32 nanosec ---- -#feedback definition -float32 distance_traveled""", - "nav2_msgs/action/ComputePathThroughPoses": """#goal definition -geometry_msgs/PoseStamped[] goals - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -geometry_msgs/PoseStamped start - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -string planner_id -bool use_start # If false, use current robot pose as path start, if true, use start above instead ---- -#result definition -nav_msgs/Path path - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -builtin_interfaces/Duration planning_time - int32 sec - uint32 nanosec ---- -#feedback definition""", - "nav2_msgs/action/ComputePathToPose": """#goal definition -geometry_msgs/PoseStamped goal - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -geometry_msgs/PoseStamped start - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -string planner_id -bool use_start # If false, use current robot pose as path start, if true, use start above instead ---- -#result definition -nav_msgs/Path path - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -builtin_interfaces/Duration planning_time - int32 sec - uint32 nanosec ---- -#feedback definition""", - "nav2_msgs/action/DriveOnHeading": """#goal definition -geometry_msgs/Point target - float64 x - float64 y - float64 z -float32 speed -builtin_interfaces/Duration time_allowance - int32 sec - uint32 nanosec ---- -#result definition -builtin_interfaces/Duration total_elapsed_time - int32 sec - uint32 nanosec ---- -#feedback definition -float32 distance_traveled""", - "nav2_msgs/action/FollowPath": """#goal definition -nav_msgs/Path path - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -string controller_id -string goal_checker_id ---- -#result definition -std_msgs/Empty result ---- -#feedback definition -float32 distance_to_goal -float32 speed""", - "nav2_msgs/action/FollowWaypoints": """#goal definition -geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 ---- -#result definition -int32[] missed_waypoints ---- -#feedback definition -uint32 current_waypoint""", - "nav2_msgs/action/NavigateThroughPoses": """#goal definition -geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -string behavior_tree ---- -#result definition -std_msgs/Empty result ---- -#feedback definition -geometry_msgs/PoseStamped current_pose - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -builtin_interfaces/Duration navigation_time - int32 sec - uint32 nanosec -builtin_interfaces/Duration estimated_time_remaining - int32 sec - uint32 nanosec -int16 number_of_recoveries -float32 distance_remaining -int16 number_of_poses_remaining -""", - "nav2_msgs/action/SmoothPath": """#goal definition -nav_msgs/Path path - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -string smoother_id -builtin_interfaces/Duration max_smoothing_duration - int32 sec - uint32 nanosec -bool check_for_collisions ---- -#result definition -nav_msgs/Path path - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - geometry_msgs/PoseStamped[] poses - std_msgs/Header header - builtin_interfaces/Time stamp - int32 sec - uint32 nanosec - string frame_id - Pose pose - Point position - float64 x - float64 y - float64 z - Quaternion orientation - float64 x 0 - float64 y 0 - float64 z 0 - float64 w 1 -builtin_interfaces/Duration smoothing_duration - int32 sec - uint32 nanosec -bool was_completed ---- -#feedback definition -""", - "nav2_msgs/action/Wait": """#goal definition -builtin_interfaces/Duration time - int32 sec - uint32 nanosec ---- -#result definition -builtin_interfaces/Duration total_elapsed_time - int32 sec - uint32 nanosec ---- -#feedback definition -builtin_interfaces/Duration time_left - int32 sec - uint32 nanosec""", -} - -ACTION_MODELS: Dict[str, Type[BaseModel]] = { - "nav2_msgs/action/NavigateToPose": NavigateToPoseGoal, - "nav2_msgs/action/Spin": SpinGoal, - "nav2_msgs/action/AssistedTeleop": AssistedTeleopGoal, - "nav2_msgs/action/BackUp": BackUpGoal, - "nav2_msgs/action/ComputePathThroughPoses": ComputePathThroughPosesGoal, - "nav2_msgs/action/ComputePathToPose": ComputePathToPoseGoal, - "nav2_msgs/action/DriveOnHeading": DriveOnHeadingGoal, - "nav2_msgs/action/FollowPath": FollowPathGoal, - "nav2_msgs/action/FollowWaypoints": FollowWaypointsGoal, - "nav2_msgs/action/NavigateThroughPoses": NavigateThroughPosesGoal, - "nav2_msgs/action/SmoothPath": SmoothPathGoal, - "nav2_msgs/action/Wait": WaitGoal, -} ACTION_STRINGS = [ f"action: {action}\ntype: {act_type}\n" - for action, act_type in ACTIONS_AND_TYPES.items() + for action, act_type in NAVIGATION_ACTIONS_AND_TYPES.items() ] SERVICE_STRINGS = [ @@ -869,79 +121,111 @@ class NavigationTask(Task): - @property - def type(self) -> str: - return "navigation" - - def get_system_prompt(self) -> str: - return ROBOT_NAVIGATION_SYSTEM_PROMPT + type = "navigation" @property def available_tools(self) -> List[BaseTool]: tools = MockActionsToolkit( mock_actions_names_and_types=ACTION_STRINGS, - available_actions=list(ACTIONS_AND_TYPES.keys()), - available_action_types=list(ACTIONS_AND_TYPES.values()), - available_action_models=ACTION_MODELS, + available_actions=list(NAVIGATION_ACTIONS_AND_TYPES.keys()), + available_action_types=list(NAVIGATION_ACTIONS_AND_TYPES.values()), + available_action_models=NAVIGATION_ACTION_MODELS, ).get_tools() - tools.append(MockGetROS2MessageInterfaceTool(mock_interfaces=INTERFACES)) + tools.append( + MockGetROS2TopicsNamesAndTypesTool( + mock_topics_names_and_types=TOPIC_STRINGS + ) + ) + tools.append( + MockGetROS2ServicesNamesAndTypesTool( + mock_service_names_and_types=SERVICE_STRINGS + ) + ) + tools.append( + MockGetROS2MessageInterfaceTool(mock_interfaces=NAVIGATION_INTERFACES) + ) return tools + @property + def optional_tool_calls_number(self) -> int: + # list topics and get interface + return 2 + + def get_system_prompt(self) -> str: + if self.n_shots == 0: + return ROBOT_NAVIGATION_SYSTEM_PROMPT_0_SHOT + elif self.n_shots == 2: + return ROBOT_NAVIGATION_SYSTEM_PROMPT_2_SHOT + else: + return ROBOT_NAVIGATION_SYSTEM_PROMPT_5_SHOT + class NavigateToPointTask(NavigationTask): - complexity = "medium" + complexity = "easy" + + def get_base_prompt(self) -> str: + return "Navigate to point (2.0, 2.0, 0.0)." def get_prompt(self) -> str: - return "Navigate to the point (2.0, 2.0, 0.0). Remember to list actions and get interface" + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can use the navigation tools to move the robot to the specified coordinates. " + "First get the available actions, then set up the navigation goal to reach point (2.0, 2.0, 0.0)." + ) class SpinAroundTask(NavigationTask): recursion_limit = 50 complexity = "medium" - def get_prompt(self) -> str: + def get_base_prompt(self) -> str: return "Spin around by 3 radians." + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can locate the robot's current orientation and execute a spinning motion " + "to rotate the robot by 3 radians from its current heading." + ) + class MoveToFrontTask(NavigationTask): recursion_limit = 50 complexity = "medium" - def get_prompt(self) -> str: + def get_base_prompt(self) -> str: return "Move 2 meters to the front." + def get_prompt(self) -> str: + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can determine the robot's current position and orientation, " + "then move it 2 meters forward in the direction it is currently facing." + ) + class MoveToBedTask(NavigationTask): recursion_limit = 50 - complexity = "medium" + complexity = "hard" - @property - def available_tools(self) -> List[BaseTool]: - return [ - MockGetROS2ActionsNamesAndTypesTool( - mock_actions_names_and_types=ACTION_STRINGS - ), - MockStartROS2ActionTool( - available_actions=list(ACTIONS_AND_TYPES.keys()), - available_action_types=list(ACTIONS_AND_TYPES.values()), - available_action_models=ACTION_MODELS, - ), - MockGetROS2ActionFeedbackTool(), - MockGetROS2ActionResultTool(), - MockGetROS2MessageInterfaceTool(mock_interfaces=INTERFACES), - MockGetROS2TopicsNamesAndTypesTool( - mock_topics_names_and_types=TOPICS_NAMES_AND_TYPES - ), - MockGetDistanceToObjectsTool( - available_topics=[ - "/camera/camera/color/image_raw", - "/camera/camera/depth/image_rect_raw", - ], - mock_distance_measurements=[ - DistanceMeasurement(name="bed", distance=5.0) - ], - ), - ] + def get_base_prompt(self) -> str: + return "Move closer to the bed leaving 1 meter space." def get_prompt(self) -> str: - return "Move closer to the to the bed. Leave 1 meter of space between the bed and you." + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()} " + "You can locate the bed in the environment, calculate the appropriate position " + "that maintains 1 meter distance from the bed, and navigate to that position." + ) diff --git a/src/rai_bench/rai_bench/tool_calling_agent/tasks/spatial.py b/src/rai_bench/rai_bench/tool_calling_agent/tasks/spatial.py index 24e328d31..94e81742e 100644 --- a/src/rai_bench/rai_bench/tool_calling_agent/tasks/spatial.py +++ b/src/rai_bench/rai_bench/tool_calling_agent/tasks/spatial.py @@ -14,18 +14,36 @@ import logging -from abc import abstractmethod +from abc import ABC, abstractmethod from typing import List from langchain_core.tools import BaseTool from pydantic import BaseModel, Field from rai.messages import preprocess_image -from rai_bench.tool_calling_agent.interfaces import Task, Validator +from rai_bench.tool_calling_agent.interfaces import Task, TaskArgs, Validator loggers_type = logging.Logger -SPATIAL_REASONING_SYSTEM_PROMPT = "You are a helpful and knowledgeable AI assistant that specializes in interpreting and analyzing visual content. Your task is to answer questions based on the images provided to you. Please response with the use of the provided tools." +SPATIAL_REASONING_SYSTEM_PROMPT_0_SHOT = """You are a helpful and knowledgeable AI assistant that specializes in interpreting and analyzing visual content. Your task is to answer questions based on the images provided to you. Please response with the use of the provided tools.""" + +SPATIAL_REASONING_SYSTEM_PROMPT_2_SHOT = ( + SPATIAL_REASONING_SYSTEM_PROMPT_0_SHOT + + """ + +Example of tool calls: +- return_bool_response, args: {'response': True} +- return_bool_response, args: {'response': False}""" +) + +# NOTE (jmatejcz) In this case we are using only one tool so there is no difference bettween 2 and 5 shot +SPATIAL_REASONING_SYSTEM_PROMPT_5_SHOT = ( + SPATIAL_REASONING_SYSTEM_PROMPT_2_SHOT + + """ +- return_bool_response, args: {'response': True} # When object is clearly visible +- return_bool_response, args: {'response': False} # When object is not present +- return_bool_response, args: {'response': True} # When spatial relationship is correct""" +) class TaskParametrizationError(Exception): @@ -34,32 +52,54 @@ class TaskParametrizationError(Exception): pass +class ReturnBoolResponseToolInput(BaseModel): + response: bool = Field(..., description="The response to the question.") + + +class ReturnBoolResponseTool(BaseTool): + """Tool that returns a boolean response.""" + + name: str = "return_bool_response" + description: str = "Return a bool response to the question." + args_schema = ReturnBoolResponseToolInput + + def _run(self, response: bool) -> bool: + if type(response) is bool: + return response + raise ValueError("Invalid response type. Response must be a boolean.") + + +class BoolImageTaskInput(BaseModel): + question: str = Field(..., description="The question to be answered.") + images_paths: List[str] = Field( + ..., + description="List of image file paths to be used for answering the question.", + ) + + class SpatialReasoningAgentTask(Task): """Abstract class for spatial reasoning tasks for tool calling agent.""" + type = "spatial_reasoning" + def __init__( self, validators: List[Validator], - extra_tool_calls: int = 0, + task_args: TaskArgs, logger: loggers_type | None = None, ) -> None: super().__init__( validators=validators, - extra_tool_calls=extra_tool_calls, + task_args=task_args, logger=logger, ) self.expected_tools: List[BaseTool] self.question: str self.images_paths: List[str] - @property - def type(self) -> str: - return "spatial_reasoning" - @abstractmethod def get_images(self) -> List[str]: """Get the images related to the task. - Returns ------- List[str] @@ -68,47 +108,25 @@ def get_images(self) -> List[str]: pass def get_system_prompt(self) -> str: - return SPATIAL_REASONING_SYSTEM_PROMPT - - -class ReturnBoolResponseToolInput(BaseModel): - response: bool = Field(..., description="The response to the question.") + if self.n_shots == 0: + return SPATIAL_REASONING_SYSTEM_PROMPT_0_SHOT + elif self.n_shots == 2: + return SPATIAL_REASONING_SYSTEM_PROMPT_2_SHOT + else: + return SPATIAL_REASONING_SYSTEM_PROMPT_5_SHOT -class ReturnBoolResponseTool(BaseTool): - """Tool that returns a boolean response.""" - - name: str = "return_bool_response" - description: str = "Return a bool response to the question." - args_schema = ReturnBoolResponseToolInput - - def _run(self, response: bool) -> bool: - if type(response) is bool: - return response - raise ValueError("Invalid response type. Response must be a boolean.") - - -class BoolImageTaskInput(BaseModel): - question: str = Field(..., description="The question to be answered.") - images_paths: List[str] = Field( - ..., - description="List of image file paths to be used for answering the question.", - ) - - -class BoolImageTask(SpatialReasoningAgentTask): - complexity = "easy" - +class BoolImageTask(SpatialReasoningAgentTask, ABC): def __init__( self, task_input: BoolImageTaskInput, validators: List[Validator], - extra_tool_calls: int = 0, + task_args: TaskArgs, logger: loggers_type | None = None, ) -> None: super().__init__( validators=validators, - extra_tool_calls=extra_tool_calls, + task_args=task_args, logger=logger, ) self.question = task_input.question @@ -118,9 +136,41 @@ def __init__( def available_tools(self) -> List[BaseTool]: return [ReturnBoolResponseTool()] - def get_prompt(self): + @property + def optional_tool_calls_number(self) -> int: + return 0 + + def get_base_prompt(self) -> str: return self.question + def get_prompt(self): + if self.prompt_detail == "brief": + return self.get_base_prompt() + else: + return ( + f"{self.get_base_prompt()}" + "You can examine the provided image(s) carefully to identify relevant features, " + "analyze the visual content, and provide a boolean response based on your observations." + ) + def get_images(self): images = [preprocess_image(image_path) for image_path in self.images_paths] return images + + +# NOTE (jmatejcz) spatial reasoning task's difficulty is based solely on prompt and image +# so in this case when declaring task, please subjectivly decide how hard is the task +# examples: +# easy -> locating single object, tell if it is present +# medium -> tell in what state is the object (is door open?) or locating multiple objects +# hard -> locating multiple objects and resoning about their relative positions (is X on the right side of Y?) +class BoolImageTaskEasy(BoolImageTask): + complexity = "easy" + + +class BoolImageTaskMedium(BoolImageTask): + complexity = "medium" + + +class BoolImageTaskHard(BoolImageTask): + complexity = "hard" diff --git a/src/rai_bench/rai_bench/utils.py b/src/rai_bench/rai_bench/utils.py index 0da9aee5f..87629ac43 100644 --- a/src/rai_bench/rai_bench/utils.py +++ b/src/rai_bench/rai_bench/utils.py @@ -43,7 +43,23 @@ def parse_tool_calling_benchmark_args(): nargs="+", choices=["easy", "medium", "hard"], default=["easy", "medium", "hard"], - help="Complexity levels to include in the benchmark", + help="Complexity levels of tasks to include in the benchmark", + ) + parser.add_argument( + "--prompt-detail", + type=str, + nargs="+", + choices=["brief", "descriptive"], + default=["brief", "descriptive"], + help="Prompt detail level to include in the benchmark", + ) + parser.add_argument( + "--n-shots", + type=int, + nargs="+", + choices=[0, 2, 5], + default=[0, 2, 5], + help="Number of examples in system prompt for few-shot prompting", ) parser.add_argument( "--task-types", diff --git a/src/rai_core/rai/initialization/model_initialization.py b/src/rai_core/rai/initialization/model_initialization.py index ce0c4ace5..c7f4fa263 100644 --- a/src/rai_core/rai/initialization/model_initialization.py +++ b/src/rai_core/rai/initialization/model_initialization.py @@ -190,6 +190,8 @@ def get_llm_model_direct( elif vendor == "ollama": from langchain_ollama import ChatOllama + # Suppress httpx info logging for Ollama + logging.getLogger("httpx").setLevel(logging.WARNING) model_config = cast(OllamaConfig, model_config) return ChatOllama(model=model_name, base_url=model_config.base_url, **kwargs) else: