|
| 1 | +/*++ |
| 2 | + Copyright (c) 2020 Microsoft Corporation |
| 3 | +
|
| 4 | + Module Name: |
| 5 | +
|
| 6 | + smt_induction.cpp |
| 7 | +
|
| 8 | + Abstract: |
| 9 | + |
| 10 | + Add induction lemmas to context. |
| 11 | +
|
| 12 | + Author: |
| 13 | +
|
| 14 | + Nikolaj Bjorner 2020-04-25 |
| 15 | +
|
| 16 | + Notes: |
| 17 | +
|
| 18 | + - work in absence of recursive functions but instead presence of quantifiers |
| 19 | + - relax current requirement of model sweeping when terms don't have value simplifications |
| 20 | + - k-induction |
| 21 | + - also to deal with mutually recursive datatypes |
| 22 | + - beyond literal induction lemmas |
| 23 | + - refine initialization of values when term is equal to constructor application, |
| 24 | + |
| 25 | +--*/ |
| 26 | + |
| 27 | +#include "ast/ast_pp.h" |
| 28 | +#include "ast/ast_util.h" |
| 29 | +#include "ast/recfun_decl_plugin.h" |
| 30 | +#include "ast/datatype_decl_plugin.h" |
| 31 | +#include "ast/arith_decl_plugin.h" |
| 32 | +#include "ast/rewriter/value_sweep.h" |
| 33 | +#include "ast/rewriter/expr_safe_replace.h" |
| 34 | +#include "smt/smt_context.h" |
| 35 | +#include "smt/smt_induction.h" |
| 36 | + |
| 37 | +using namespace smt; |
| 38 | + |
| 39 | +/** |
| 40 | + * collect literals that are assigned to true, |
| 41 | + * but evaluate to false under all extensions of |
| 42 | + * the congruence closure. |
| 43 | + */ |
| 44 | + |
| 45 | +literal_vector collect_induction_literals::pre_select() { |
| 46 | + literal_vector result; |
| 47 | + for (unsigned i = m_literal_index; i < ctx.assigned_literals().size(); ++i) { |
| 48 | + literal lit = ctx.assigned_literals()[i]; |
| 49 | + smt::bool_var v = lit.var(); |
| 50 | + if (!ctx.has_enode(v)) { |
| 51 | + continue; |
| 52 | + } |
| 53 | + expr* e = ctx.bool_var2expr(v); |
| 54 | + if (!lit.sign() && m.is_eq(e)) |
| 55 | + continue; |
| 56 | + result.push_back(lit); |
| 57 | + } |
| 58 | + ctx.push_trail(value_trail<context, unsigned>(m_literal_index)); |
| 59 | + m_literal_index = ctx.assigned_literals().size(); |
| 60 | + return result; |
| 61 | +} |
| 62 | + |
| 63 | +void collect_induction_literals::model_sweep_filter(literal_vector& candidates) { |
| 64 | + expr_ref_vector terms(m); |
| 65 | + for (literal lit : candidates) { |
| 66 | + terms.push_back(ctx.bool_var2expr(lit.var())); |
| 67 | + } |
| 68 | + vector<expr_ref_vector> values; |
| 69 | + vs(terms, values); |
| 70 | + unsigned j = 0; |
| 71 | + IF_VERBOSE(1, |
| 72 | + verbose_stream() << "terms: " << terms << "\n"; |
| 73 | + for (auto const& vec : values) { |
| 74 | + verbose_stream() << "assignment: " << vec << "\n"; |
| 75 | + }); |
| 76 | + for (unsigned i = 0; i < terms.size(); ++i) { |
| 77 | + literal lit = candidates[i]; |
| 78 | + bool is_viable_candidate = true; |
| 79 | + for (auto const& vec : values) { |
| 80 | + if (vec[i] && lit.sign() && m.is_true(vec[i])) |
| 81 | + continue; |
| 82 | + if (vec[i] && !lit.sign() && m.is_false(vec[i])) |
| 83 | + continue; |
| 84 | + is_viable_candidate = false; |
| 85 | + break; |
| 86 | + } |
| 87 | + if (is_viable_candidate) |
| 88 | + candidates[j++] = lit; |
| 89 | + } |
| 90 | + candidates.shrink(j); |
| 91 | +} |
| 92 | + |
| 93 | + |
| 94 | +collect_induction_literals::collect_induction_literals(context& ctx, ast_manager& m, value_sweep& vs): |
| 95 | + ctx(ctx), |
| 96 | + m(m), |
| 97 | + vs(vs), |
| 98 | + m_literal_index(0) |
| 99 | +{} |
| 100 | + |
| 101 | +literal_vector collect_induction_literals::operator()() { |
| 102 | + literal_vector candidates = pre_select(); |
| 103 | + model_sweep_filter(candidates); |
| 104 | + return candidates; |
| 105 | +} |
| 106 | + |
| 107 | + |
| 108 | +// -------------------------------------- |
| 109 | +// create_induction_lemmas |
| 110 | + |
| 111 | +bool create_induction_lemmas::is_induction_candidate(enode* n) { |
| 112 | + expr* e = n->get_owner(); |
| 113 | + if (m.is_value(e)) |
| 114 | + return false; |
| 115 | + // TBD: filter if n is equivalent to a value. |
| 116 | + sort* s = m.get_sort(e); |
| 117 | + if (m_dt.is_datatype(s) && m_dt.is_recursive(s)) |
| 118 | + return true; |
| 119 | + |
| 120 | + // potentially also induction on integers, sequences |
| 121 | + // m_arith.is_int(s) |
| 122 | + // return true; |
| 123 | + return false; |
| 124 | +} |
| 125 | + |
| 126 | +/** |
| 127 | + * positions in n that can be used for induction |
| 128 | + * the positions are distinct roots |
| 129 | + * and none of the roots are equivalent to a value in the current |
| 130 | + * congruence closure. |
| 131 | + */ |
| 132 | +enode_vector create_induction_lemmas::induction_positions(enode* n) { |
| 133 | + enode_vector result; |
| 134 | + enode_vector todo; |
| 135 | + auto add_todo = [&](enode* n) { |
| 136 | + if (!n->is_marked()) { |
| 137 | + n->set_mark(); |
| 138 | + todo.push_back(n); |
| 139 | + } |
| 140 | + }; |
| 141 | + add_todo(n); |
| 142 | + for (unsigned i = 0; i < todo.size(); ++i) { |
| 143 | + n = todo[i]; |
| 144 | + for (enode* a : smt::enode::args(n)) |
| 145 | + add_todo(a); |
| 146 | + if (is_induction_candidate(n)) |
| 147 | + result.push_back(n); |
| 148 | + } |
| 149 | + for (enode* n : todo) |
| 150 | + n->unset_mark(); |
| 151 | + return result; |
| 152 | +} |
| 153 | + |
| 154 | + |
| 155 | +/** |
| 156 | + * abstraction candidates for replacing different occurrence of t in n by x |
| 157 | + * it returns all possible non-empty subsets of t replaced by x. |
| 158 | + * |
| 159 | + * TBD: add term sharing throttle. |
| 160 | + * TDD: add depth throttle. |
| 161 | + */ |
| 162 | +void create_induction_lemmas::abstract(enode* n, enode* t, expr* x, abstractions& result) { |
| 163 | + if (n->get_root() == t->get_root()) { |
| 164 | + result.push_back(abstraction(m, x, n->get_owner(), t->get_owner())); |
| 165 | + } |
| 166 | + abstraction_args r1, r2; |
| 167 | + r1.push_back(abstraction_arg(m)); |
| 168 | + for (enode* arg : enode::args(n)) { |
| 169 | + unsigned n = result.size(); |
| 170 | + abstract(arg, t, x, result); |
| 171 | + for (unsigned i = n; i < result.size(); ++i) { |
| 172 | + abstraction& a = result[i]; |
| 173 | + for (auto const& v : r1) { |
| 174 | + r2.push_back(v); |
| 175 | + r2.back().push_back(a); |
| 176 | + } |
| 177 | + } |
| 178 | + r1.swap(r2); |
| 179 | + r2.reset(); |
| 180 | + result.shrink(n); |
| 181 | + } |
| 182 | + for (auto const& a : r1) { |
| 183 | + result.push_back(abstraction(m, m.mk_app(n->get_decl(), a.m_terms), a.m_eqs)); |
| 184 | + } |
| 185 | +}; |
| 186 | + |
| 187 | +/** |
| 188 | + * filter generalizations based on value_generator |
| 189 | + * If all evaluations are counter-examples, include |
| 190 | + * candidate generalization. |
| 191 | + */ |
| 192 | +void create_induction_lemmas::filter_abstractions(bool sign, abstractions& abs) { |
| 193 | + vector<expr_ref_vector> values; |
| 194 | + expr_ref_vector fmls(m); |
| 195 | + for (auto & a : abs) fmls.push_back(a.m_term); |
| 196 | + vs(fmls, values); |
| 197 | + unsigned j = 0; |
| 198 | + for (unsigned i = 0; i < fmls.size(); ++i) { |
| 199 | + bool all_cex = true; |
| 200 | + for (auto const& vec : values) { |
| 201 | + if (vec[i] && (m.is_true(vec[i]) == sign)) |
| 202 | + continue; |
| 203 | + all_cex = false; |
| 204 | + break; |
| 205 | + } |
| 206 | + if (all_cex) { |
| 207 | + abs[j++] = abs.get(i); |
| 208 | + } |
| 209 | + } |
| 210 | + abs.shrink(j); |
| 211 | +} |
| 212 | + |
| 213 | +/* |
| 214 | + * Create simple induction lemmas of the form: |
| 215 | + * |
| 216 | + * lit & a.eqs() & is-c(t) => is-c(sk); |
| 217 | + * lit & a.eqs() => alpha |
| 218 | + * lit & a.eqs() & is-c(t) => ~beta |
| 219 | + * |
| 220 | + * where alpha = a.term() |
| 221 | + * beta = alpha[sk/access_k(sk)] |
| 222 | + * for each constructor c, that is recursive |
| 223 | + * and contains argument of datatype sort s |
| 224 | + * |
| 225 | + * TBD: consider k-inductive lemmas. |
| 226 | + */ |
| 227 | +void create_induction_lemmas::create_lemmas(expr* t, expr* sk, abstraction& a, literal lit) { |
| 228 | + std::cout << "abstraction: " << a.m_term << "\n"; |
| 229 | + sort* s = m.get_sort(sk); |
| 230 | + if (!m_dt.is_datatype(s)) |
| 231 | + return; |
| 232 | + family_id fid = s->get_family_id(); |
| 233 | + expr_ref alpha = a.m_term; |
| 234 | + auto const& eqs = a.m_eqs; |
| 235 | + literal_vector common_literals; |
| 236 | + for (func_decl* c : *m_dt.get_datatype_constructors(s)) { |
| 237 | + func_decl* is_c = m_dt.get_constructor_recognizer(c); |
| 238 | + bool has_1recursive = false; |
| 239 | + for (func_decl* acc : *m_dt.get_constructor_accessors(c)) { |
| 240 | + if (acc->get_range() != s) |
| 241 | + continue; |
| 242 | + if (common_literals.empty()) { |
| 243 | + common_literals.push_back(~lit); |
| 244 | + for (auto const& p : eqs) { |
| 245 | + common_literals.push_back(~mk_literal(m.mk_eq(p.first, p.second))); |
| 246 | + } |
| 247 | + } |
| 248 | + has_1recursive = true; |
| 249 | + literal_vector lits(common_literals); |
| 250 | + lits.push_back(~mk_literal(m.mk_app(is_c, t))); |
| 251 | + expr_ref beta(alpha); |
| 252 | + expr_safe_replace rep(m); |
| 253 | + rep.insert(sk, m.mk_app(acc, sk)); |
| 254 | + rep(beta); |
| 255 | + literal b_lit = mk_literal(beta); |
| 256 | + if (lit.sign()) b_lit.neg(); |
| 257 | + lits.push_back(~b_lit); |
| 258 | + add_th_lemma(lits); |
| 259 | + } |
| 260 | + if (has_1recursive) { |
| 261 | + literal_vector lits(common_literals); |
| 262 | + lits.push_back(~mk_literal(m.mk_app(is_c, t))); |
| 263 | + lits.push_back(mk_literal(m.mk_app(is_c, sk))); |
| 264 | + add_th_lemma(lits); |
| 265 | + } |
| 266 | + } |
| 267 | + if (!common_literals.empty()) { |
| 268 | + literal_vector lits(common_literals); |
| 269 | + literal a_lit = mk_literal(alpha); |
| 270 | + if (lit.sign()) a_lit.neg(); |
| 271 | + lits.push_back(a_lit); |
| 272 | + add_th_lemma(lits); |
| 273 | + } |
| 274 | +} |
| 275 | + |
| 276 | +void create_induction_lemmas::add_th_lemma(literal_vector const& lits) { |
| 277 | + IF_VERBOSE(1, ctx.display_literals_verbose(verbose_stream() << "lemma:\n", lits) << "\n"); |
| 278 | + ctx.mk_clause(lits.size(), lits.c_ptr(), nullptr, smt::CLS_TH_LEMMA); |
| 279 | + ++m_num_lemmas; |
| 280 | +} |
| 281 | + |
| 282 | +literal create_induction_lemmas::mk_literal(expr* e) { |
| 283 | + if (!ctx.e_internalized(e)) { |
| 284 | + ctx.internalize(e, false); |
| 285 | + } |
| 286 | + enode* n = ctx.get_enode(e); |
| 287 | + ctx.mark_as_relevant(n); |
| 288 | + return ctx.get_literal(e); |
| 289 | +} |
| 290 | + |
| 291 | +func_decl* create_induction_lemmas::mk_skolem(sort* s) { |
| 292 | + func_decl* f = nullptr; |
| 293 | + if (!m_sort2skolem.find(s, f)) { |
| 294 | + sort* domain[2] = { s, m.mk_bool_sort() }; |
| 295 | + f = m.mk_fresh_func_decl("sk", 2, domain, s); |
| 296 | + m_pinned.push_back(f); |
| 297 | + m_pinned.push_back(s); |
| 298 | + m_sort2skolem.insert(s, f); |
| 299 | + } |
| 300 | + return f; |
| 301 | +} |
| 302 | + |
| 303 | + |
| 304 | +bool create_induction_lemmas::operator()(literal lit) { |
| 305 | + unsigned num = m_num_lemmas; |
| 306 | + enode* r = ctx.bool_var2enode(lit.var()); |
| 307 | + for (enode* n : induction_positions(r)) { |
| 308 | + expr* t = n->get_owner(); |
| 309 | + sort* s = m.get_sort(t); |
| 310 | + expr_ref sk(m.mk_app(mk_skolem(s), t, r->get_owner()), m); |
| 311 | + std::cout << "abstract " << mk_pp(t, m) << " " << sk << "\n"; |
| 312 | + abstractions abs; |
| 313 | + abstract(r, n, sk, abs); |
| 314 | + abs.pop_back(); // last position has no generalizations |
| 315 | + filter_abstractions(lit.sign(), abs); |
| 316 | + for (abstraction& a : abs) { |
| 317 | + create_lemmas(t, sk, a, lit); |
| 318 | + } |
| 319 | + } |
| 320 | + return m_num_lemmas > num; |
| 321 | +} |
| 322 | + |
| 323 | +create_induction_lemmas::create_induction_lemmas(context& ctx, ast_manager& m, value_sweep& vs): |
| 324 | + ctx(ctx), |
| 325 | + m(m), |
| 326 | + vs(vs), |
| 327 | + m_dt(m), |
| 328 | + m_pinned(m), |
| 329 | + m_num_lemmas(0) |
| 330 | +{} |
| 331 | + |
| 332 | +induction::induction(context& ctx, ast_manager& m): |
| 333 | + ctx(ctx), |
| 334 | + m(m), |
| 335 | + vs(m), |
| 336 | + m_collect_literals(ctx, m, vs), |
| 337 | + m_create_lemmas(ctx, m, vs) |
| 338 | +{} |
| 339 | + |
| 340 | +// TBD: use smt_arith_value to also include state from arithmetic solver |
| 341 | +void induction::init_values() { |
| 342 | + for (enode* n : ctx.enodes()) |
| 343 | + if (m.is_value(n->get_owner())) |
| 344 | + for (enode* r : *n) |
| 345 | + vs.set_value(r->get_owner(), n->get_owner()); |
| 346 | +} |
| 347 | + |
| 348 | +bool induction::operator()() { |
| 349 | + bool added_lemma = false; |
| 350 | + vs.reset_values(); |
| 351 | + literal_vector candidates = m_collect_literals(); |
| 352 | + for (literal lit : candidates) { |
| 353 | + if (m_create_lemmas(lit)) |
| 354 | + added_lemma = true; |
| 355 | + } |
| 356 | + return added_lemma; |
| 357 | +} |
| 358 | + |
| 359 | +// state contains datatypes + recursive functions |
| 360 | +// more comprehensive: |
| 361 | +// state contains integers / datatypes / sequences + recursive function / quantifiers |
| 362 | + |
| 363 | +bool induction::should_try(context& ctx) { |
| 364 | + recfun::util u(ctx.get_manager()); |
| 365 | + datatype::util dt(ctx.get_manager()); |
| 366 | + theory* adt = ctx.get_theory(dt.get_family_id()); |
| 367 | + return adt && adt->get_num_vars() > 0 && !u.get_rec_funs().empty(); |
| 368 | +} |
0 commit comments