@@ -1169,33 +1169,53 @@ namespace sls {
1169
1169
if (is_value (x))
1170
1170
return false ;
1171
1171
1172
- vector<zstring> conts ;
1172
+ vector<lookahead> lookaheads ;
1173
1173
expr_ref d_r (y, m);
1174
1174
seq_rewriter seqrw (m);
1175
1175
for (unsigned i = 0 ; i < s.length (); ++i) {
1176
1176
verbose_stream () << " Derivative " << s.extract (0 , i) << " : " << d_r << " \n " ;
1177
1177
if (seq.re .is_empty (d_r))
1178
1178
break ;
1179
1179
zstring prefix = s.extract (0 , i);
1180
- choose (d_r, 2 , prefix, conts );
1180
+ choose (d_r, 2 , prefix, lookaheads );
1181
1181
expr_ref ch (seq.str .mk_char (s[i]), m);
1182
1182
d_r = seqrw.mk_derivative (ch, d_r);
1183
1183
}
1184
- if (!seq.re .is_empty (d_r))
1185
- choose (d_r, 2 , s, conts);
1184
+ unsigned current_min_length = UINT_MAX;
1185
+ if (!seq.re .is_empty (d_r)) {
1186
+ choose (d_r, 2 , s, lookaheads);
1187
+ current_min_length = info.min_length ;
1188
+ }
1189
+
1190
+ unsigned global_min_length = UINT_MAX;
1191
+ for (auto & [str, min_length] : lookaheads)
1192
+ global_min_length = std::max (min_length, global_min_length);
1193
+
1194
+ verbose_stream () << " repair in_re " << current_min_length << " "
1195
+ << global_min_length << " " << mk_pp (e, m) << " " << s << " \n " ;
1186
1196
1187
- verbose_stream () << " repair in_re " << mk_pp (e, m) << " " << s << " \n " ;
1188
- for (auto & str : conts)
1189
- verbose_stream () << " prefix " << str << " \n " ;
1190
1197
1191
1198
// TODO: do some length analysis to prune out short candidates when there are longer ones.
1192
1199
// TODO: when matching .*"bcd" with string ab, the extension abc is more interesting than aba.
1193
1200
if (ctx.is_true (e)) {
1194
- for (auto & str : conts)
1195
- m_str_updates.push_back ({ x, str, 1 });
1201
+ for (auto & [str, min_length] : lookaheads) {
1202
+ if (min_length == UINT_MAX && current_min_length < UINT_MAX)
1203
+ continue ;
1204
+ if (global_min_length < min_length)
1205
+ continue ;
1206
+ double score = 0.001 ;
1207
+ if (min_length < UINT_MAX && s.length () < str.length ()) {
1208
+ // reward small lengths
1209
+ // penalize size differences (unless min_length decreases)
1210
+ score = 1 << (current_min_length - min_length);
1211
+ score /= ((double )abs ((int )s.length () - (int )str.length ()) + 1 );
1212
+ }
1213
+ verbose_stream () << " prefix " << score << " " << min_length << " : " << str << " \n " ;
1214
+ m_str_updates.push_back ({ x, str, score });
1215
+ }
1196
1216
}
1197
1217
else {
1198
- for (auto & str : conts )
1218
+ for (auto & [ str, min_length] : lookaheads )
1199
1219
m_str_updates.push_back ({ x, str + zstring (m_chars[ctx.rand (m_chars.size ())]), 1 });
1200
1220
}
1201
1221
return apply_update ();
@@ -1249,9 +1269,9 @@ namespace sls {
1249
1269
}
1250
1270
}
1251
1271
1252
- void seq_plugin::choose (expr* r, unsigned k, zstring& prefix, vector<zstring >& result) {
1272
+ void seq_plugin::choose (expr* r, unsigned k, zstring& prefix, vector<lookahead >& result) {
1253
1273
auto info = seq.re .get_info (r);
1254
- result.push_back (prefix);
1274
+ result.push_back ({ prefix, info. min_length } );
1255
1275
if (k == 0 )
1256
1276
return ;
1257
1277
unsigned_vector chars;
0 commit comments