1
+ #include < iostream>
2
+ #include < fstream>
3
+ #include " json.hpp"
4
+ #include < regex>
5
+ #include < memory>
6
+
7
+ using json = nlohmann::ordered_json;
8
+
9
+
10
+ std::string generate_uuid () {
11
+ static std::random_device rd;
12
+ static std::mt19937 generator (rd ());
13
+ static std::uniform_int_distribution<int > distribution (0 , 15 );
14
+
15
+ const char *v = " 0123456789abcdef" ;
16
+ std::stringstream uuid;
17
+
18
+ for (int i = 0 ; i < 8 ; ++i) {
19
+ uuid << v[distribution (generator)];
20
+ }
21
+ return uuid.str ();
22
+ }
23
+
24
+
25
+ std::string jsonrepair (const std::string value) {
26
+ std::array<char , 128 > buffer;
27
+ std::string result;
28
+ // Ensure the command passed to popen() is null-terminated
29
+ std::string tmpfile_name = " ." + generate_uuid () + " .json" ;
30
+ std::ofstream outfile (tmpfile_name);
31
+ outfile << value; // Assuming jsonStr contains your JSON string
32
+ outfile.close ();
33
+ std::string command = " node jsonrepair.ts " + tmpfile_name;
34
+ std::unique_ptr<FILE, decltype (&pclose)> pipe (popen (command.c_str (), " r" ), pclose);
35
+ if (!pipe) {
36
+ throw std::runtime_error (" popen() failed!" );
37
+ }
38
+ while (fgets (buffer.data (), buffer.size (), pipe.get ()) != nullptr ) {
39
+ result += buffer.data ();
40
+ }
41
+ return result;
42
+ }
43
+
44
+
45
+ json parse_if_json (const std::string& value) {
46
+ try {
47
+ // json repair here
48
+ return json::parse (jsonrepair (value));
49
+ } catch (const json::parse_error&) {
50
+ return value; // Return the original string if parsing fails
51
+ }
52
+ }
53
+
54
+
55
+ std::string clean_command_string (const std::string& command_str) {
56
+ std::string cleaned_command = std::regex_replace (command_str, std::regex (R"( \\(?!["\\/bfnrt]|u[a-fA-F0-9]{4}))" ), " " );
57
+ cleaned_command = std::regex_replace (cleaned_command, std::regex (R"( \\")" ), " \" " );
58
+
59
+ if (cleaned_command.front () == ' "' && cleaned_command.back () == ' "' ) {
60
+ cleaned_command = cleaned_command.substr (1 , cleaned_command.size () - 2 );
61
+ }
62
+ return cleaned_command;
63
+ }
64
+
65
+
66
+ json clean_json_strings (const std::string& input_str) {
67
+ try {
68
+ // json repair here
69
+ std::string fixed_str = jsonrepair (input_str);
70
+ json data = json::parse (fixed_str);
71
+
72
+ for (auto & [key, value] : data.items ()) {
73
+ if (value.is_string ()) {
74
+ std::string val = value.get <std::string>();
75
+ if (val.front () == ' {' || val.front () == ' [' ) {
76
+ data[key] = parse_if_json (val);
77
+ } else {
78
+ data[key] = clean_command_string (val);
79
+ }
80
+ } else if (value.is_object ()) {
81
+ for (auto & [k, v] : value.items ()) {
82
+ if (v.is_string ()) {
83
+ v = clean_command_string (v.get <std::string>());
84
+ }
85
+ }
86
+ }
87
+ }
88
+ return data;
89
+ } catch (const json::parse_error& e) {
90
+ std::cout << " Error decoding JSON: " << e.what () << std::endl;
91
+ return nullptr ;
92
+ }
93
+ }
94
+
95
+
96
+
97
+
98
+ std::vector<json> rubra_fc_json_tool_extractor (const std::string& output_str) {
99
+ std::vector<json> result;
100
+ printf (" OUTPUT STR TO BE PARSED : %s" , output_str.c_str ());
101
+ if (output_str.find (" endtoolcall" ) == std::string::npos) {
102
+ return result;
103
+ }
104
+
105
+ std::vector<std::string> listOfStrToParse;
106
+ size_t start = 0 , end = 0 ;
107
+
108
+ // Iterate until all instances of "endtoolcall" are processed
109
+ while ((end = output_str.find (" endtoolcall" , start)) != std::string::npos) {
110
+ std::string segment = output_str.substr (start, end - start);
111
+ size_t pos = segment.find (" starttoolcall" );
112
+ if (pos != std::string::npos) {
113
+ // Extract substring after "toolcall"
114
+ listOfStrToParse.push_back (segment.substr (pos + std::string (" starttoolcall" ).length ()));
115
+ }
116
+ start = end + std::string (" endtoolcall" ).length (); // Move past the "endtoolcall"
117
+ }
118
+
119
+ std::vector<json> function_call_json;
120
+
121
+ try {
122
+ for (const auto & line : listOfStrToParse) {
123
+ // json fc = json::parse(line);
124
+ json fc = clean_json_strings (line);
125
+ if (fc[" arguments" ].is_string ()) {
126
+ fc[" arguments" ] = json::parse (fc[" arguments" ].get <std::string>());
127
+ }
128
+ if (!fc.is_null ()) {
129
+ function_call_json.push_back (fc);
130
+ }
131
+
132
+ }
133
+ } catch (const std::exception& e) {
134
+ std::cerr << " Error: " << e.what () << std::endl;
135
+ }
136
+
137
+ for (const auto & fc : function_call_json) {
138
+ json func_call;
139
+ func_call[" id" ] = generate_uuid ();
140
+ func_call[" name" ] = fc[" name" ];
141
+ func_call[" kwargs" ] = fc[" arguments" ];
142
+ func_call[" type" ] = " function" ;
143
+ result.push_back (func_call);
144
+ }
145
+
146
+ return result;
147
+ }
0 commit comments