@@ -74,11 +74,16 @@ Revert bias on long strings:
74
74
- bake in bias for shorter strings into equation solving?
75
75
76
76
Equality solving using stochastic Nelson.
77
+ - When solving for an equality w = v, first convert them into two vectors by removing concatenations.
78
+ The updates are then performed on the arguments to concatenations and not the concatenations themselves.
79
+ This saves some amount of spurious work when pushing assignments down over concatenations, which is
80
+ what the current first version of the solver does.
77
81
- Given equality where current assignment does not satisfy it:
78
82
- Xw = v:
79
83
- let X' range over prefixes of X that matches v.
80
84
- non-deterministic set X <- strval0(X')
81
- - non-deterministic set X <- strval0(X') + 'a' where strval0(X') + 'a' matches prefix of strval0(v), and X' is longest prefix of X that matches v.
85
+ - non-deterministic set X <- strval0(X') + 'a' where strval0(X') + 'a'
86
+ matches prefix of strval0(v), and X' is longest prefix of X that matches v.
82
87
- If X fully matches a prefix of v, then, in addition to the rules above:
83
88
- consume constant character from strval0(X)w = v
84
89
- reveal the next variable to solve for.
@@ -90,6 +95,8 @@ Equality solving using stochastic Nelson.
90
95
#include " ast/sls/sls_seq_plugin.h"
91
96
#include " ast/sls/sls_context.h"
92
97
#include " ast/ast_pp.h"
98
+ #include " ast/rewriter/seq_rewriter.h"
99
+ #include " ast/rewriter/th_rewriter.h"
93
100
94
101
95
102
namespace sls {
@@ -258,7 +265,6 @@ namespace sls {
258
265
VERIFY (seq.str .is_contains (e, a, b));
259
266
if (seq.is_string (a->get_sort ()))
260
267
return strval0 (a).contains (strval0 (b));
261
-
262
268
NOT_IMPLEMENTED_YET ();
263
269
break ;
264
270
case OP_SEQ_PREFIX:
@@ -274,6 +280,11 @@ namespace sls {
274
280
NOT_IMPLEMENTED_YET ();
275
281
break ;
276
282
case OP_SEQ_IN_RE:
283
+ VERIFY (seq.str .is_in_re (e, a, b));
284
+ if (seq.is_string (a->get_sort ()))
285
+ return is_in_re (strval0 (a), b);
286
+ NOT_IMPLEMENTED_YET ();
287
+ break ;
277
288
case OP_SEQ_NTH:
278
289
case OP_SEQ_NTH_I:
279
290
case OP_SEQ_NTH_U:
@@ -420,35 +431,24 @@ namespace sls {
420
431
}
421
432
422
433
void seq_plugin::repair_up (app* e) {
423
-
424
434
if (m.is_bool (e))
425
435
return ;
426
-
427
- if (seq.str .is_itos (e)) {
428
- repair_up_str_itos (e);
436
+ if (is_value (e))
429
437
return ;
430
- }
431
- if (seq.str .is_stoi (e)) {
438
+ if (seq.str .is_itos (e))
439
+ repair_up_str_itos (e);
440
+ else if (seq.str .is_stoi (e))
432
441
repair_up_str_stoi (e);
433
- return ;
434
- }
435
- if (seq.str .is_length (e)) {
442
+ else if (seq.str .is_length (e))
436
443
repair_up_str_length (e);
437
- return ;
438
- }
439
- if (seq.str .is_index (e)) {
444
+ else if (seq.str .is_index (e))
440
445
repair_up_str_indexof (e);
441
- return ;
442
- }
443
- if (seq.is_string (e->get_sort ())) {
444
- if (is_value (e))
445
- return ;
446
+ else if (seq.is_string (e->get_sort ())) {
446
447
strval0 (e) = strval1 (e);
447
448
ctx.new_value_eh (e);
448
- return ;
449
449
}
450
-
451
- verbose_stream () << " repair up nyi: " << mk_bounded_pp (e, m) << " \n " ;
450
+ else
451
+ verbose_stream () << " repair up nyi: " << mk_bounded_pp (e, m) << " \n " ;
452
452
}
453
453
454
454
bool seq_plugin::repair_down (app* e) {
@@ -461,6 +461,7 @@ namespace sls {
461
461
if (m.is_eq (e))
462
462
return repair_down_eq (e);
463
463
464
+
464
465
NOT_IMPLEMENTED_YET ();
465
466
return false ;
466
467
}
@@ -621,6 +622,10 @@ namespace sls {
621
622
return repair_down_str_itos (e);
622
623
case OP_STRING_STOI:
623
624
return repair_down_str_stoi (e);
625
+ case OP_SEQ_IN_RE:
626
+ if (seq.is_string (to_app (e)->get_arg (0 )->get_sort ()))
627
+ return repair_down_in_re (e);
628
+ break ;
624
629
case OP_STRING_UBVTOS:
625
630
case OP_STRING_SBVTOS:
626
631
case OP_STRING_TO_CODE:
@@ -639,8 +644,6 @@ namespace sls {
639
644
case OP_SEQ_FOLDLI:
640
645
641
646
case OP_SEQ_TO_RE:
642
- case OP_SEQ_IN_RE:
643
-
644
647
case OP_RE_PLUS:
645
648
case OP_RE_STAR:
646
649
case OP_RE_OPTION:
@@ -679,7 +682,6 @@ namespace sls {
679
682
m_int_updates.push_back ({ x, r, 1 });
680
683
else
681
684
m_int_updates.push_back ({ x, rational (-1 - ctx.rand (10 )), 1 });
682
-
683
685
return apply_update ();
684
686
}
685
687
@@ -1137,4 +1139,132 @@ namespace sls {
1137
1139
return get_eval (e).is_value ;
1138
1140
return m.is_value (e);
1139
1141
}
1142
+
1143
+ // Regular expressions
1144
+
1145
+ bool seq_plugin::is_in_re (zstring const & s, expr* r) {
1146
+ expr_ref sval (seq.str .mk_string (s), m);
1147
+ th_rewriter rw (m);
1148
+ expr_ref in_re (seq.re .mk_in_re (sval, r), m);
1149
+ rw (in_re);
1150
+ SASSERT (m.limit ().is_canceled () || m.is_true (in_re) || m.is_false (in_re));
1151
+ return m.is_true (in_re);
1152
+ }
1153
+
1154
+ bool seq_plugin::repair_down_in_re (app* e) {
1155
+ expr* x, * y;
1156
+ VERIFY (seq.str .is_in_re (e, x, y));
1157
+ auto info = seq.re .get_info (y);
1158
+ if (!info.interpreted )
1159
+ return false ;
1160
+ auto s = strval0 (x);
1161
+ expr_ref xval (seq.str .mk_string (s), m);
1162
+ expr_ref in_re (seq.re .mk_in_re (xval, y), m);
1163
+ th_rewriter rw (m);
1164
+ rw (in_re);
1165
+ SASSERT (m.limit ().is_canceled () || m.is_true (in_re) || m.is_false (in_re));
1166
+ if (m.is_true (in_re) == ctx.is_true (e))
1167
+ return true ;
1168
+
1169
+ if (is_value (x))
1170
+ return false ;
1171
+
1172
+ vector<zstring> conts;
1173
+ expr_ref d_r (y, m);
1174
+ seq_rewriter seqrw (m);
1175
+ for (unsigned i = 0 ; i < s.length (); ++i) {
1176
+ verbose_stream () << " Derivative " << s.extract (0 , i) << " : " << d_r << " \n " ;
1177
+ if (seq.re .is_empty (d_r))
1178
+ break ;
1179
+ zstring prefix = s.extract (0 , i);
1180
+ choose (d_r, 2 , prefix, conts);
1181
+ expr_ref ch (seq.str .mk_char (s[i]), m);
1182
+ d_r = seqrw.mk_derivative (ch, d_r);
1183
+ }
1184
+ if (!seq.re .is_empty (d_r))
1185
+ choose (d_r, 2 , s, conts);
1186
+
1187
+ verbose_stream () << " repair in_re " << mk_pp (e, m) << " " << s << " \n " ;
1188
+ for (auto & str : conts)
1189
+ verbose_stream () << " prefix " << str << " \n " ;
1190
+
1191
+ // TODO: do some length analysis to prune out short candidates when there are longer ones.
1192
+ // TODO: when matching .*"bcd" with string ab, the extension abc is more interesting than aba.
1193
+ if (ctx.is_true (e)) {
1194
+ for (auto & str : conts)
1195
+ m_str_updates.push_back ({ x, str, 1 });
1196
+ }
1197
+ else {
1198
+ for (auto & str : conts)
1199
+ m_str_updates.push_back ({ x, str + m_chars[ctx.rand (m_chars.size ())], 1 });
1200
+ }
1201
+ return apply_update ();
1202
+ }
1203
+
1204
+ void seq_plugin::next_char (expr* r, unsigned_vector& chars) {
1205
+ SASSERT (seq.is_re (r));
1206
+ expr* x, * y;
1207
+ zstring s;
1208
+ if (seq.re .is_concat (r, x, y)) {
1209
+ auto info = seq.re .get_info (x);
1210
+ next_char (x, chars);
1211
+ if (info.nullable == l_true)
1212
+ next_char (y, chars);
1213
+ }
1214
+ else if (seq.re .is_to_re (r, x)) {
1215
+ if (seq.str .is_string (x, s) && !s.empty ())
1216
+ chars.push_back (s[0 ]);
1217
+ }
1218
+ else if (seq.re .is_union (r, x, y)) {
1219
+ next_char (x, chars);
1220
+ next_char (y, chars);
1221
+ }
1222
+ else if (seq.re .is_range (r, x, y)) {
1223
+ zstring s1, s2;
1224
+ seq.str .is_string (x, s1);
1225
+ seq.str .is_string (y, s2);
1226
+ if (s1.length () == 1 && s2.length () == 1 && s1[0 ] <= s2[0 ]) {
1227
+ chars.push_back (s1[0 ] + ctx.rand (s2[0 ] - s1[0 ] + 1 ));
1228
+ chars.push_back (s1[0 ]);
1229
+ chars.push_back (s2[0 ]);
1230
+ }
1231
+ }
1232
+ else if (seq.re .is_star (r, x) || seq.re .is_plus (r, x)) {
1233
+ next_char (x, chars);
1234
+ }
1235
+ else if (seq.re .is_empty (r)) {
1236
+ ;
1237
+ }
1238
+ else if (seq.re .is_full_seq (r)) {
1239
+ if (!m_chars.empty ())
1240
+ chars.push_back (m_chars[ctx.rand (m_chars.size ())]);
1241
+ }
1242
+ else if (seq.re .is_full_char (r)) {
1243
+ if (!m_chars.empty ())
1244
+ chars.push_back (m_chars[ctx.rand (m_chars.size ())]);
1245
+ }
1246
+ else {
1247
+ verbose_stream () << " regex nyi " << mk_bounded_pp (r, m) << " \n " ;
1248
+ NOT_IMPLEMENTED_YET ();
1249
+ }
1250
+ }
1251
+
1252
+ void seq_plugin::choose (expr* r, unsigned k, zstring& prefix, vector<zstring>& result) {
1253
+ auto info = seq.re .get_info (r);
1254
+ result.push_back (prefix);
1255
+ if (k == 0 )
1256
+ return ;
1257
+ unsigned_vector chars;
1258
+ next_char (r, chars);
1259
+ std::stable_sort (chars.begin (), chars.end ());
1260
+ auto it = std::unique (chars.begin (), chars.end ());
1261
+ chars.shrink ((unsigned )(it - chars.begin ()));
1262
+ for (auto ch : chars) {
1263
+ expr_ref c (seq.str .mk_char (ch), m);
1264
+ seq_rewriter rw (m);
1265
+ expr_ref r2 = rw.mk_derivative (c, r);
1266
+ zstring prefix2 = prefix + zstring (ch);
1267
+ choose (r2, k - 1 , prefix2, result);
1268
+ }
1269
+ }
1140
1270
}
0 commit comments