Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RESTful with pickle argument example #753

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions rdagent/app/data_science/agent_dist/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

# Installation


```bash
pip install jsonpickle

python rdagent/app/data_science/agent_dist/agents.py

python rdagent/app/data_science/agent_dist/ctrl_agent.py --competition playground-series-s4e8
```
143 changes: 143 additions & 0 deletions rdagent/app/data_science/agent_dist/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@

import jsonpickle
from conf import DIST_SETTING
from flask import Flask, jsonify, request
from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.log import rdagent_logger as logger
from rdagent.scenarios.data_science.proposal.exp_gen import DSExpGen

from rdagent.scenarios.data_science.dev.feedback import DSExperiment2Feedback
from rdagent.scenarios.data_science.dev.runner import DSCoSTEERRunner
from rdagent.components.coder.data_science.ensemble import EnsembleCoSTEER
from rdagent.components.coder.data_science.feature import FeatureCoSTEER
from rdagent.components.coder.data_science.model import ModelCoSTEER
from rdagent.components.coder.data_science.pipeline import PipelineCoSTEER
from rdagent.components.coder.data_science.raw_data_loader import DataLoaderCoSTEER
from rdagent.components.coder.data_science.workflow import WorkflowCoSTEER
from rdagent.components.coder.data_science.ensemble.exp import EnsembleTask
from rdagent.components.coder.data_science.feature.exp import FeatureTask
from rdagent.components.coder.data_science.model.exp import ModelTask
from rdagent.components.coder.data_science.pipeline.exp import PipelineTask
from rdagent.components.coder.data_science.raw_data_loader.exp import DataLoaderTask
from rdagent.components.coder.data_science.workflow.exp import WorkflowTask
from rdagent.core.proposal import ExperimentFeedback

app = Flask(__name__)

@app.route("/exp-gen", methods=["POST"])
def exp_gen():
"""Research"""
data = request.get_json()
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode for deserializing user-controlled data. Instead, we can use a safer alternative like json.loads for JSON data, which does not allow arbitrary code execution. This change will ensure that only valid JSON data is processed, mitigating the risk of deserialization vulnerabilities.

  • Replace jsonpickle.decode with json.loads for deserializing user-provided data.
  • Ensure that the data being deserialized is in a valid JSON format.
  • Update the code to handle the deserialized JSON data appropriately.
Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,5 +31,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
@@ -113,6 +113,6 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
+        trace = json.loads(data["trace"])
         
EOF
@@ -31,5 +31,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

@@ -113,6 +113,6 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
trace = json.loads(data["trace"])

Copilot is powered by AI and may make mistakes. Always verify output.
trace = jsonpickle.decode(data["trace"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode for deserializing user-controlled data. Instead, we can use json.loads to safely parse the JSON data. This change will ensure that only basic JSON types (e.g., dictionaries, lists, strings, numbers) are parsed, preventing the construction of arbitrary objects.

We will replace the jsonpickle.decode calls with json.loads and adjust the code to handle the resulting data structures appropriately.

Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,5 +31,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
@@ -113,6 +113,6 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
+        trace = json.loads(data["trace"])
         
EOF
@@ -31,5 +31,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

@@ -113,6 +113,6 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
trace = json.loads(data["trace"])

Copilot is powered by AI and may make mistakes. Always verify output.
exp = DSExpGen(scen).gen(trace)
# Serialize the experiment object using jsonpickle.
exp_pickle = jsonpickle.encode(exp, unpicklable=True)
return jsonify({"experiment": exp_pickle}), 200
except Exception as e:
return jsonify({"error": jsonpickle.encode(e)}), 500



@app.route("/coding", methods=["POST"])
def coding():
data = request.get_json()
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode for deserializing user-provided data. Instead, we can use json.loads to safely parse the JSON data. This will ensure that only basic data types (like dictionaries, lists, strings, numbers, etc.) are parsed, avoiding the risk of arbitrary code execution.

We will replace the jsonpickle.decode calls with json.loads and adjust the code to work with the resulting data structures.

Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,9 +31,9 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
         # Serialize the experiment object using jsonpickle.
-        exp_pickle = jsonpickle.encode(exp, unpicklable=True)
-        return jsonify({"experiment": exp_pickle}), 200
+        exp_json = json.dumps(exp)
+        return jsonify({"experiment": exp_json}), 200
     except Exception as e:
@@ -47,5 +47,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         # Initialize coders
@@ -78,5 +78,5 @@
 
-        # Serialize the updated experiment object using jsonpickle.
-        exp_pickle = jsonpickle.encode(exp, unpicklable=True)
-        return jsonify({"experiment": exp_pickle}), 200
+        # Serialize the updated experiment object using JSON.
+        exp_json = json.dumps(exp)
+        return jsonify({"experiment": exp_json}), 200
     except Exception as e:
EOF
@@ -31,9 +31,9 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
# Serialize the experiment object using jsonpickle.
exp_pickle = jsonpickle.encode(exp, unpicklable=True)
return jsonify({"experiment": exp_pickle}), 200
exp_json = json.dumps(exp)
return jsonify({"experiment": exp_json}), 200
except Exception as e:
@@ -47,5 +47,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
# Initialize coders
@@ -78,5 +78,5 @@

# Serialize the updated experiment object using jsonpickle.
exp_pickle = jsonpickle.encode(exp, unpicklable=True)
return jsonify({"experiment": exp_pickle}), 200
# Serialize the updated experiment object using JSON.
exp_json = json.dumps(exp)
return jsonify({"experiment": exp_json}), 200
except Exception as e:
Copilot is powered by AI and may make mistakes. Always verify output.
exp = jsonpickle.decode(data["exp"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode for deserializing user-controlled data. Instead, we can use json.loads to parse the JSON data, which is safer and does not allow arbitrary code execution. This change will ensure that only basic data types (e.g., dictionaries, lists, strings, numbers) are parsed from the input, mitigating the risk of code execution vulnerabilities.

Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,5 +31,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
@@ -39,3 +39,3 @@
     except Exception as e:
-        return jsonify({"error": jsonpickle.encode(e)}), 500
+        return jsonify({"error": json.dumps(str(e))}), 500
 
@@ -47,5 +47,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         # Initialize coders
EOF
@@ -31,5 +31,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
@@ -39,3 +39,3 @@
except Exception as e:
return jsonify({"error": jsonpickle.encode(e)}), 500
return jsonify({"error": json.dumps(str(e))}), 500

@@ -47,5 +47,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
# Initialize coders
Copilot is powered by AI and may make mistakes. Always verify output.
# Initialize coders
data_loader_coder = DataLoaderCoSTEER(scen)
feature_coder = FeatureCoSTEER(scen)
model_coder = ModelCoSTEER(scen)
ensemble_coder = EnsembleCoSTEER(scen)
workflow_coder = WorkflowCoSTEER(scen)
pipeline_coder = PipelineCoSTEER(scen)

# Process tasks
for tasks in exp.pending_tasks_list:
exp.sub_tasks = tasks
with logger.tag(f"{exp.sub_tasks[0].__class__.__name__}"):
if isinstance(exp.sub_tasks[0], DataLoaderTask):
exp = data_loader_coder.develop(exp)
elif isinstance(exp.sub_tasks[0], FeatureTask):
exp = feature_coder.develop(exp)
elif isinstance(exp.sub_tasks[0], ModelTask):
exp = model_coder.develop(exp)
elif isinstance(exp.sub_tasks[0], EnsembleTask):
exp = ensemble_coder.develop(exp)
elif isinstance(exp.sub_tasks[0], WorkflowTask):
exp = workflow_coder.develop(exp)
elif isinstance(exp.sub_tasks[0], PipelineTask):
exp = pipeline_coder.develop(exp)
else:
raise NotImplementedError(f"Unsupported component in DataScienceRDLoop: {exp.hypothesis.component}")
exp.sub_tasks = []

# Serialize the updated experiment object using jsonpickle.
exp_pickle = jsonpickle.encode(exp, unpicklable=True)
return jsonify({"experiment": exp_pickle}), 200
except Exception as e:
print(e)
return jsonify({"error": jsonpickle.encode(e)}), 500


@app.route("/run", methods=["POST"])
def run():
"""Run the experiment"""
data = request.get_json()
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode on untrusted data. Instead, we can use json.loads to parse the JSON data and then manually construct the necessary objects. This approach ensures that only safe data types are processed, reducing the risk of arbitrary code execution.

  1. Replace jsonpickle.decode with json.loads to parse the JSON data.
  2. Manually construct the necessary objects from the parsed JSON data.
Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -47,5 +47,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Parse the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         # Initialize coders
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Parse the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
EOF
@@ -47,5 +47,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Parse the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
# Initialize coders
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Parse the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

Copilot is powered by AI and may make mistakes. Always verify output.
exp = jsonpickle.decode(data["exp"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode for deserializing user-controlled data. Instead, we can use a safer alternative like json.loads to parse the JSON data. This approach ensures that only basic data types (e.g., dictionaries, lists, strings, numbers) are deserialized, preventing the execution of arbitrary code.

We will replace the jsonpickle.decode calls with json.loads and adjust the code to handle the resulting data structures appropriately.

Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -47,5 +47,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         # Initialize coders
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
EOF
@@ -47,5 +47,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
# Initialize coders
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

Copilot is powered by AI and may make mistakes. Always verify output.

# Initialize the runner
runner = DSCoSTEERRunner(scen)

# Develop the experiment using the runner
new_exp = runner.develop(exp)

# Serialize the updated experiment object using jsonpickle.
exp_pickle = jsonpickle.encode(new_exp, unpicklable=True)
return jsonify({"experiment": exp_pickle}), 200
except Exception as e:
return jsonify({"error": jsonpickle.encode(e)}), 500


@app.route("/feedback", methods=["POST"])
def feedback():
"""Generate feedback for the experiment"""
data = request.get_json()
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode for deserializing user-controlled data. Instead, we can use json.loads to safely parse JSON data. This change ensures that only basic JSON types (like dictionaries, lists, strings, numbers, etc.) are parsed, preventing the construction of arbitrary objects.

We will replace the jsonpickle.decode calls with json.loads and adjust the code to handle the resulting data structures appropriately.

Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,5 +31,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
@@ -113,6 +113,6 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
+        trace = json.loads(data["trace"])
         
EOF
@@ -31,5 +31,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

@@ -113,6 +113,6 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
trace = json.loads(data["trace"])

Copilot is powered by AI and may make mistakes. Always verify output.
exp = jsonpickle.decode(data["exp"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode on untrusted data. Instead, we can use json.loads to safely parse the JSON data and then manually reconstruct the objects if necessary. This approach ensures that no arbitrary code execution occurs during deserialization.

  • Replace jsonpickle.decode with json.loads to safely parse the JSON data.
  • Manually reconstruct the objects from the parsed JSON data.
  • Ensure that the functionality remains the same while eliminating the security risk.
Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,5 +31,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
@@ -113,6 +113,6 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
+        trace = json.loads(data["trace"])
         
EOF
@@ -31,5 +31,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

@@ -113,6 +113,6 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
trace = json.loads(data["trace"])

Copilot is powered by AI and may make mistakes. Always verify output.
trace = jsonpickle.decode(data["trace"])

Check failure

Code scanning / CodeQL

Deserialization of user-controlled data Critical

Unsafe deserialization depends on a
user-provided value
.

Copilot Autofix

AI 5 days ago

To fix the problem, we should avoid using jsonpickle.decode on untrusted data. Instead, we can use json.loads to parse the JSON data and then manually construct the objects. This approach ensures that only the expected data structures are created, reducing the risk of arbitrary code execution.

We will replace the jsonpickle.decode calls with json.loads and manually construct the necessary objects. This change will be made in the exp_gen, run, and feedback functions.

Suggested changeset 1
rdagent/app/data_science/agent_dist/agents.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/rdagent/app/data_science/agent_dist/agents.py b/rdagent/app/data_science/agent_dist/agents.py
--- a/rdagent/app/data_science/agent_dist/agents.py
+++ b/rdagent/app/data_science/agent_dist/agents.py
@@ -31,5 +31,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        trace = json.loads(data["trace"])
         exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
         
@@ -113,6 +113,6 @@
     try:
-        # Decode the provided jsonpickled objects.
-        scen = jsonpickle.decode(data["scen"])
-        exp = jsonpickle.decode(data["exp"])
-        trace = jsonpickle.decode(data["trace"])
+        # Decode the provided JSON objects.
+        scen = json.loads(data["scen"])
+        exp = json.loads(data["exp"])
+        trace = json.loads(data["trace"])
         
EOF
@@ -31,5 +31,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
trace = json.loads(data["trace"])
exp = DSExpGen(scen).gen(trace)
@@ -91,5 +91,5 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])

@@ -113,6 +113,6 @@
try:
# Decode the provided jsonpickled objects.
scen = jsonpickle.decode(data["scen"])
exp = jsonpickle.decode(data["exp"])
trace = jsonpickle.decode(data["trace"])
# Decode the provided JSON objects.
scen = json.loads(data["scen"])
exp = json.loads(data["exp"])
trace = json.loads(data["trace"])

Copilot is powered by AI and may make mistakes. Always verify output.

# Initialize the summarizer
summarizer = DSExperiment2Feedback(scen)

# Generate feedback using the summarizer

if trace.next_incomplete_component() is None or DS_RD_SETTING.coder_on_whole_pipeline:
# we have alreadly completed components in previous trace. So current loop is focusing on a new proposed idea.
# So we need feedback for the proposal.
feedback = summarizer.generate_feedback(exp, trace)
else:
# Otherwise, it is on drafting stage, don't need complicated feedbacks.
feedback = ExperimentFeedback(
reason=f"{exp.hypothesis.component} is completed.",
decision=True,
)

# Serialize the feedback object using jsonpickle.
feedback_pickle = jsonpickle.encode(feedback, unpicklable=True)
return jsonify({"feedback": feedback_pickle}), 200
except Exception as e:
return jsonify({"error": jsonpickle.encode(e)}), 500


if __name__ == "__main__":
app.run(host=DIST_SETTING.host, port=DIST_SETTING.port, debug=False)
15 changes: 15 additions & 0 deletions rdagent/app/data_science/agent_dist/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pydantic_settings import BaseSettings, SettingsConfigDict


class DistSettings(BaseSettings):
"""Distributional Agents Settings."""

host: str = "localhost"
port: int = 5321

model_config = SettingsConfigDict(
env_prefix="DIST_",
# extra="allow", # Does it allow extrasettings
)

DIST_SETTING = DistSettings()
148 changes: 148 additions & 0 deletions rdagent/app/data_science/agent_dist/ctrl_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
from pathlib import Path
from typing import Any

import fire
import jsonpickle
import requests
from rdagent.app.data_science.agent_dist.conf import DIST_SETTING
from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.components.workflow.conf import BasePropSetting
from rdagent.components.workflow.rd_loop import RDLoop
from rdagent.core.exception import CoderError, RunnerError
from rdagent.core.proposal import ExperimentFeedback
from rdagent.core.scenario import Scenario
from rdagent.core.utils import import_class
from rdagent.log import rdagent_logger as logger
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
from rdagent.scenarios.data_science.proposal.exp_gen import DSTrace
from rdagent.scenarios.kaggle.kaggle_crawler import download_data


class DataScienceRDLoop(RDLoop):
skip_loop_error = (CoderError, RunnerError)

def __init__(self, PROP_SETTING: BasePropSetting):
logger.log_object(PROP_SETTING.competition, tag="competition")
self.scen: Scenario = import_class(PROP_SETTING.scen)(PROP_SETTING.competition)
self.trace = DSTrace(scen=self.scen)

super(RDLoop, self).__init__()

def _call_api(self, get_key, uri, **kwargs):
# Make a POST request to the exp-gen endpoint
# print({k: jsonpickle.encode(v, unpicklable=True) for k, v in kwargs.items()})
response = requests.post(
f"http://{DIST_SETTING.host}:{DIST_SETTING.port}/{uri}",
json={k: jsonpickle.encode(v, unpicklable=True) for k, v in kwargs.items()},
)

# Check if the request was successful
exp_data = response.json()
if response.status_code == 200:
return jsonpickle.decode(exp_data[get_key])
else:
print(f"Failed to generate experiment: {response.json()['error']}")
raise jsonpickle.decode(exp_data["error"])

def direct_exp_gen(self, prev_out: dict[str, Any]):
# Call exp_gen to generate a new experiment
# Serialize the scenario and trace using jsonpickle
return self._call_api("experiment", "exp-gen", **{"scen": self.scen, "trace": self.trace})

def coding(self, prev_out: dict[str, Any]):
exp = prev_out["direct_exp_gen"]
exp = self._call_api("experiment", "coding", **{"exp": exp, "scen": self.scen})
logger.log_object(exp)
return exp

def running(self, prev_out: dict[str, Any]):
exp: DSExperiment = prev_out["coding"]
if exp.is_ready_to_run():
new_exp = self._call_api("experiment", "run", **{"exp": exp, "scen": self.scen})
logger.log_object(new_exp)
return new_exp
return exp

def feedback(self, prev_out: dict[str, Any]) -> ExperimentFeedback:
exp: DSExperiment = prev_out["running"]
feedback = self._call_api("feedback", "feedback", **{"exp": exp, "scen": self.scen, "trace": self.trace})
logger.log_object(feedback)
return feedback

def record(self, prev_out: dict[str, Any]):
e = prev_out.get(self.EXCEPTION_KEY, None)
if e is None:
self.trace.hist.append((prev_out["running"], prev_out["feedback"]))
else:
self.trace.hist.append(
(
prev_out["direct_exp_gen"] if isinstance(e, CoderError) else prev_out["coding"],
ExperimentFeedback.from_exception(e),
)
)
if (
self.trace.sota_experiment() is None
and len(self.trace.hist) >= DS_RD_SETTING.consecutive_errors
and not DS_RD_SETTING.coder_on_whole_pipeline
):
# if {in inital/drafting stage} and {tried enough times}
for _, fb in self.trace.hist[-DS_RD_SETTING.consecutive_errors :]:
if fb:
break # any success will stop restarting.
else: # otherwise restart it
logger.error("Consecutive errors reached the limit. Dumping trace.")
logger.log_object(self.trace, tag="trace before restart")
self.trace = DSTrace(scen=self.trace.scen, knowledge_base=self.trace.knowledge_base)
logger.log_object(self.trace, tag="trace")
logger.log_object(self.trace.sota_experiment(), tag="SOTA experiment")


def main(
path=None, output_path=None, step_n=None, loop_n=None, competition="bms-molecular-translation", do_truncate=True
):
"""

Parameters
----------
path :
path like `$LOG_PATH/__session__/1/0_propose`. It indicates that we restore the state that after finish the step 0 in loop 1
output_path :
path like `$LOG_PATH`. It indicates that where we want to save our session and log information.
step_n :
How many steps to run; if None, it will run forever until error or KeyboardInterrupt
loop_n :
How many loops to run; if None, it will run forever until error or KeyboardInterrupt
- if current loop is incomplete, it will be counted as the first loop for completion.
- if both step_n and loop_n are provided, the process will stop as soon as either condition is met.
competition :
do_truncate :
If set to True, the logger will truncate the future log messages by calling `logger.storage.truncate`.


Auto R&D Evolving loop for models in a Kaggle scenario.
You can continue running session by
.. code-block:: bash
dotenv run -- python rdagent/app/data_science/loop.py [--competition titanic] $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter
rdagent kaggle --competition playground-series-s4e8 # You are encouraged to use this one.
"""
if competition is not None:
DS_RD_SETTING.competition = competition

if DS_RD_SETTING.competition:
if DS_RD_SETTING.scen.endswith("KaggleScen"):
download_data(competition=DS_RD_SETTING.competition, settings=DS_RD_SETTING)
else:
if not Path(f"{DS_RD_SETTING.local_data_path}/{competition}").exists():
logger.error(f"Please prepare data for competition {competition} first.")
return
else:
logger.error("Please specify competition name.")
if path is None:
kaggle_loop = DataScienceRDLoop(DS_RD_SETTING)
else:
kaggle_loop = DataScienceRDLoop.load(path, output_path, do_truncate)
kaggle_loop.run(step_n=step_n, loop_n=loop_n)


if __name__ == "__main__":
fire.Fire(main)
Loading