Skip to content

Commit cb9d218

Browse files
committed
bench: udpate gpt-4o-2024-11-20
1 parent 68e177a commit cb9d218

File tree

265 files changed

+24937
-4011
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

265 files changed

+24937
-4011
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# ObjectConstraint-embedded-json-desc
2+
## Summary
3+
- Aggregation
4+
- Trial: #10
5+
- Success: #10
6+
- 1st: #8
7+
- 2nd: #2
8+
- 3rd: #0
9+
- Failure: #0
10+
- Nothing: #0
11+
- Error: 0
12+
- Average Time: 2111 ms
13+
- Token Usage:
14+
- Everything
15+
- Input
16+
- Total
17+
- Cached
18+
- Output
19+
- Total
20+
- Reasoning
21+
- Accepted Predication
22+
- Rejected Predication
23+
24+
## Scenario
25+
### Conversation (user)
26+
Make a virtual person with random values,
27+
and make it to participate in a campaign.
28+
29+
## Trials
30+
No | Status | Time
31+
---:|:-------|------:
32+
[1. success](./trials/1.success.json) | 1️⃣ | 1,330 ms
33+
[2. success](./trials/2.success.json) | 1️⃣ | 1,384 ms
34+
[3. success](./trials/3.success.json) | 2️⃣ | 2,744 ms
35+
[4. success](./trials/4.success.json) | 2️⃣ | 2,594 ms
36+
[5. success](./trials/5.success.json) | 1️⃣ | 4,550 ms
37+
[6. success](./trials/6.success.json) | 1️⃣ | 1,325 ms
38+
[7. success](./trials/7.success.json) | 1️⃣ | 2,894 ms
39+
[8. success](./trials/8.success.json) | 1️⃣ | 1,351 ms
40+
[9. success](./trials/9.success.json) | 1️⃣ | 1,619 ms
41+
[10. success](./trials/10.success.json) | 1️⃣ | 1,319 ms
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"type": "success",
3+
"id": "call_CubgEmFQKKhRxik5zg0bLoFV",
4+
"arguments": {
5+
"id": "b82decd0-f54a-4fbf-bab2-cb45e2a9e4df",
6+
"name": "John Doe",
7+
"thumbnail": "https://example.com/thumbnail.jpg",
8+
"age": 25,
9+
"email": "[email protected]",
10+
"hobbies": [
11+
"reading",
12+
"photography",
13+
"hiking"
14+
],
15+
"isActive": true,
16+
"created_at": "2023-10-24T09:00:00Z"
17+
},
18+
"result": {
19+
"success": true,
20+
"data": {
21+
"id": "b82decd0-f54a-4fbf-bab2-cb45e2a9e4df",
22+
"name": "John Doe",
23+
"thumbnail": "https://example.com/thumbnail.jpg",
24+
"age": 25,
25+
"email": "[email protected]",
26+
"hobbies": [
27+
"reading",
28+
"photography",
29+
"hiking"
30+
],
31+
"isActive": true,
32+
"created_at": "2023-10-24T09:00:00Z"
33+
}
34+
},
35+
"completion": {
36+
"id": "gen-1749644971-xpG1U4gP6lludwsx6h08",
37+
"provider": "OpenAI",
38+
"model": "openai/gpt-4o-2024-11-20",
39+
"object": "chat.completion",
40+
"created": 1749644971,
41+
"choices": [
42+
{
43+
"logprobs": null,
44+
"finish_reason": "tool_calls",
45+
"native_finish_reason": "tool_calls",
46+
"index": 0,
47+
"message": {
48+
"role": "assistant",
49+
"content": "",
50+
"refusal": null,
51+
"reasoning": null,
52+
"tool_calls": [
53+
{
54+
"index": 0,
55+
"id": "call_CubgEmFQKKhRxik5zg0bLoFV",
56+
"type": "function",
57+
"function": {
58+
"name": "participate",
59+
"arguments": "{\"id\":\"b82decd0-f54a-4fbf-bab2-cb45e2a9e4df\",\"name\":\"John Doe\",\"thumbnail\":\"https://example.com/thumbnail.jpg\",\"age\":25,\"email\":\"[email protected]\",\"hobbies\":[\"reading\",\"photography\",\"hiking\"],\"isActive\":true,\"created_at\":\"2023-10-24T09:00:00Z\"}"
60+
}
61+
}
62+
]
63+
}
64+
}
65+
],
66+
"system_fingerprint": "fp_c67768f680",
67+
"usage": {
68+
"prompt_tokens": 174,
69+
"completion_tokens": 101,
70+
"total_tokens": 275,
71+
"prompt_tokens_details": {
72+
"cached_tokens": 0
73+
},
74+
"completion_tokens_details": {
75+
"reasoning_tokens": 0
76+
}
77+
}
78+
},
79+
"started_at": "2025-06-11T12:29:31.500Z",
80+
"completed_at": "2025-06-11T12:29:32.830Z",
81+
"previous": []
82+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"type": "success",
3+
"id": "call_GLI9btGymxSqcs3MIKW8ARYj",
4+
"arguments": {
5+
"id": "84a1c759-5983-4981-87d3-e0d82a15d6e4",
6+
"name": "Alex Johnson",
7+
"thumbnail": "https://example.com/image1.jpg",
8+
"age": 30,
9+
"email": "[email protected]",
10+
"hobbies": [
11+
"reading",
12+
"cycling",
13+
"swimming"
14+
],
15+
"isActive": true,
16+
"created_at": "2023-10-02T13:10:00Z"
17+
},
18+
"result": {
19+
"success": true,
20+
"data": {
21+
"id": "84a1c759-5983-4981-87d3-e0d82a15d6e4",
22+
"name": "Alex Johnson",
23+
"thumbnail": "https://example.com/image1.jpg",
24+
"age": 30,
25+
"email": "[email protected]",
26+
"hobbies": [
27+
"reading",
28+
"cycling",
29+
"swimming"
30+
],
31+
"isActive": true,
32+
"created_at": "2023-10-02T13:10:00Z"
33+
}
34+
},
35+
"completion": {
36+
"id": "gen-1749644971-RTj4hwLtvANU6w5i5bng",
37+
"provider": "OpenAI",
38+
"model": "openai/gpt-4o-2024-11-20",
39+
"object": "chat.completion",
40+
"created": 1749644971,
41+
"choices": [
42+
{
43+
"logprobs": null,
44+
"finish_reason": "tool_calls",
45+
"native_finish_reason": "tool_calls",
46+
"index": 0,
47+
"message": {
48+
"role": "assistant",
49+
"content": "",
50+
"refusal": null,
51+
"reasoning": null,
52+
"tool_calls": [
53+
{
54+
"index": 0,
55+
"id": "call_GLI9btGymxSqcs3MIKW8ARYj",
56+
"type": "function",
57+
"function": {
58+
"name": "participate",
59+
"arguments": "{\"id\":\"84a1c759-5983-4981-87d3-e0d82a15d6e4\",\"name\":\"Alex Johnson\",\"thumbnail\":\"https://example.com/image1.jpg\",\"age\":30,\"email\":\"[email protected]\",\"hobbies\":[\"reading\",\"cycling\",\"swimming\"],\"isActive\":true,\"created_at\":\"2023-10-02T13:10:00Z\"}"
60+
}
61+
}
62+
]
63+
}
64+
}
65+
],
66+
"system_fingerprint": "fp_c67768f680",
67+
"usage": {
68+
"prompt_tokens": 174,
69+
"completion_tokens": 100,
70+
"total_tokens": 274,
71+
"prompt_tokens_details": {
72+
"cached_tokens": 0
73+
},
74+
"completion_tokens_details": {
75+
"reasoning_tokens": 0
76+
}
77+
}
78+
},
79+
"started_at": "2025-06-11T12:29:31.501Z",
80+
"completed_at": "2025-06-11T12:29:32.820Z",
81+
"previous": []
82+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"type": "success",
3+
"id": "call_Tcp0LMwJl1NPyiBtCeErvIHi",
4+
"arguments": {
5+
"id": "8a80af84-9a3c-4896-9e5d-bc0e46f5f8a5",
6+
"name": "John Doe",
7+
"thumbnail": "https://example.com/image.jpeg",
8+
"age": 27,
9+
"email": "[email protected]",
10+
"hobbies": [
11+
"reading",
12+
"cycling",
13+
"hiking"
14+
],
15+
"isActive": true,
16+
"created_at": "2023-10-10T12:00:00Z"
17+
},
18+
"result": {
19+
"success": true,
20+
"data": {
21+
"id": "8a80af84-9a3c-4896-9e5d-bc0e46f5f8a5",
22+
"name": "John Doe",
23+
"thumbnail": "https://example.com/image.jpeg",
24+
"age": 27,
25+
"email": "[email protected]",
26+
"hobbies": [
27+
"reading",
28+
"cycling",
29+
"hiking"
30+
],
31+
"isActive": true,
32+
"created_at": "2023-10-10T12:00:00Z"
33+
}
34+
},
35+
"completion": {
36+
"id": "gen-1749644971-drduljX6BBdK1dWn8tTh",
37+
"provider": "OpenAI",
38+
"model": "openai/gpt-4o-2024-11-20",
39+
"object": "chat.completion",
40+
"created": 1749644971,
41+
"choices": [
42+
{
43+
"logprobs": null,
44+
"finish_reason": "tool_calls",
45+
"native_finish_reason": "tool_calls",
46+
"index": 0,
47+
"message": {
48+
"role": "assistant",
49+
"content": "",
50+
"refusal": null,
51+
"reasoning": null,
52+
"tool_calls": [
53+
{
54+
"index": 0,
55+
"id": "call_Tcp0LMwJl1NPyiBtCeErvIHi",
56+
"type": "function",
57+
"function": {
58+
"name": "participate",
59+
"arguments": "{\"id\":\"8a80af84-9a3c-4896-9e5d-bc0e46f5f8a5\",\"name\":\"John Doe\",\"thumbnail\":\"https://example.com/image.jpeg\",\"age\":27,\"email\":\"[email protected]\",\"hobbies\":[\"reading\",\"cycling\",\"hiking\"],\"isActive\":true,\"created_at\":\"2023-10-10T12:00:00Z\"}"
60+
}
61+
}
62+
]
63+
}
64+
}
65+
],
66+
"system_fingerprint": "fp_c67768f680",
67+
"usage": {
68+
"prompt_tokens": 174,
69+
"completion_tokens": 103,
70+
"total_tokens": 277,
71+
"prompt_tokens_details": {
72+
"cached_tokens": 0
73+
},
74+
"completion_tokens_details": {
75+
"reasoning_tokens": 0
76+
}
77+
}
78+
},
79+
"started_at": "2025-06-11T12:29:31.501Z",
80+
"completed_at": "2025-06-11T12:29:32.885Z",
81+
"previous": []
82+
}

0 commit comments

Comments
 (0)