Branch data Line data Source code
1 : : /******************************************************************************
2 : : * Top contributors (to current version):
3 : : * Andrew Reynolds, Aina Niemetz, Fabian Wolff
4 : : *
5 : : * This file is part of the cvc5 project.
6 : : *
7 : : * Copyright (c) 2009-2025 by the authors listed in the file AUTHORS
8 : : * in the top-level source directory and their institutional affiliations.
9 : : * All rights reserved. See the file COPYING in the top-level source
10 : : * directory for licensing information.
11 : : * ****************************************************************************
12 : : *
13 : : * sygus_sampler
14 : : */
15 : :
16 : : #include "cvc5_private.h"
17 : :
18 : : #ifndef CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H
19 : : #define CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H
20 : :
21 : : #include <map>
22 : :
23 : : #include "smt/env_obj.h"
24 : : #include "theory/quantifiers/lazy_trie.h"
25 : : #include "theory/quantifiers/term_enumeration.h"
26 : :
27 : : namespace cvc5::internal {
28 : :
29 : : class Env;
30 : :
31 : : namespace theory {
32 : : namespace quantifiers {
33 : :
34 : : class TermDbSygus;
35 : :
36 : : /** SygusSampler
37 : : *
38 : : * This class can be used to test whether two expressions are equivalent
39 : : * by random sampling. We use this class for the following options:
40 : : *
41 : : * sygus-rr-synth: synthesize candidate rewrite rules by finding two terms
42 : : * t1 and t2 do not rewrite to the same term, but are identical when considering
43 : : * a set of sample points, and
44 : : *
45 : : * sygus-rr-verify: detect unsound rewrite rules by finding two terms t1 and
46 : : * t2 that rewrite to the same term, but not identical when considering a set
47 : : * of sample points.
48 : : *
49 : : * To use this class:
50 : : * (1) Call initialize( tds, f, nsamples) where f is sygus datatype term.
51 : : * (2) For terms n1....nm enumerated that correspond to builtin analog of sygus
52 : : * term f, we call registerTerm( n1 )....registerTerm( nm ). It may be the case
53 : : * that registerTerm( ni ) returns nj for some j < i. In this case, we have that
54 : : * ni and nj are equivalent under the sample points in this class.
55 : : *
56 : : * For example, say the grammar for f is:
57 : : * A = 0 | 1 | x | y | A+A | ite( B, A, A )
58 : : * B = A <= A
59 : : * If we call initialize( tds, f, 5 ), this class will generate 5 random sample
60 : : * points for (x,y), say (0,0), (1,1), (0,1), (1,0), (2,2). The return values
61 : : * of successive calls to registerTerm are listed below.
62 : : * registerTerm( 0 ) -> 0
63 : : * registerTerm( x ) -> x
64 : : * registerTerm( x+y ) -> x+y
65 : : * registerTerm( y+x ) -> x+y
66 : : * registerTerm( x+ite(x <= 1+1, 0, y ) ) -> x
67 : : * Notice that the number of sample points can be configured for the above
68 : : * options using sygus-samples=N.
69 : : */
70 : : class SygusSampler : protected EnvObj, public LazyTrieEvaluator
71 : : {
72 : : public:
73 : : SygusSampler(Env& env);
74 : 21847 : ~SygusSampler() override {}
75 : :
76 : : /** initialize
77 : : *
78 : : * tn : the return type of terms we will be testing with this class,
79 : : * vars : the variables we are testing substitutions for,
80 : : * nsamples : number of sample points this class will test,
81 : : * unique_type_ids : if this is set to true, then we consider each variable
82 : : * in vars to have a unique "type id". A type id is a finer-grained notion of
83 : : * type that is used to determine when a rewrite rule is redundant.
84 : : */
85 : : virtual void initialize(TypeNode tn,
86 : : const std::vector<Node>& vars,
87 : : unsigned nsamples,
88 : : bool unique_type_ids = false);
89 : : /** initialize sygus
90 : : *
91 : : * qe : pointer to quantifiers engine,
92 : : * f : a term of some SyGuS datatype type whose values we will be
93 : : * testing under the free variables in the grammar of f,
94 : : * nsamples : number of sample points this class will test,
95 : : * useSygusType : whether we will register terms with this sampler that have
96 : : * the same type as f. If this flag is false, then we will be registering
97 : : * terms of the analog of the type of f, that is, the builtin type that
98 : : * f's type encodes in the deep embedding.
99 : : */
100 : : virtual void initializeSygus(TypeNode ftn, unsigned nsamples);
101 : : /** register term n with this sampler database
102 : : *
103 : : * forceKeep is whether we wish to force that n is chosen as a representative
104 : : * value in the trie.
105 : : */
106 : : virtual Node registerTerm(Node n, bool forceKeep = false);
107 : : /** get number of sample points */
108 : 137 : unsigned getNumSamplePoints() const { return d_samples.size(); }
109 : : /** get variables, adds d_vars to vars */
110 : : void getVariables(std::vector<Node>& vars) const;
111 : : /** get sample point
112 : : *
113 : : * Appends sample point #index to the vector pt, d_vars to vars.
114 : : */
115 : : const std::vector<Node>& getSamplePoint(size_t index) const;
116 : : /** Add pt to the set of sample points considered by this sampler */
117 : : void addSamplePoint(const std::vector<Node>& pt);
118 : : /** evaluate n on sample point index */
119 : : Node evaluate(Node n, unsigned index) override;
120 : : /**
121 : : * Compute the variables from the domain of d_var_index that occur in n,
122 : : * store these in the vector fvs.
123 : : */
124 : : void computeFreeVariables(Node n, std::vector<Node>& fvs);
125 : : /**
126 : : * Returns the index of a sample point such that the evaluation of a and b
127 : : * diverge, or -1 if no such sample point exists.
128 : : */
129 : : int getDiffSamplePointIndex(Node a, Node b);
130 : :
131 : : //--------------------------queries about terms
132 : : /** is contiguous
133 : : *
134 : : * This returns whether n's free variables (terms occurring in the range of
135 : : * d_type_vars) are a prefix of the list of variables in d_type_vars for each
136 : : * type id. For instance, if d_type_vars[id] = { x, y } for some id, then
137 : : * 0, x, x+y, y+x are contiguous but y is not. This is useful for excluding
138 : : * terms from consideration that are alpha-equivalent to others.
139 : : */
140 : : bool isContiguous(Node n);
141 : : /** is ordered
142 : : *
143 : : * This returns whether n's free variables are in order with respect to
144 : : * variables in d_type_vars for each type. For instance, if
145 : : * d_type_vars[id] = { x, y } for some id, then 0, x, x+y are ordered but
146 : : * y and y+x are not.
147 : : */
148 : : bool isOrdered(Node n);
149 : : /** is linear
150 : : *
151 : : * This returns whether n contains at most one occurrence of each free
152 : : * variable. For example, x, x+y are linear, but x+x, (x-y)+y, (x+0)+(x+0) are
153 : : * non-linear.
154 : : */
155 : : bool isLinear(Node n);
156 : : /** check variables
157 : : *
158 : : * This returns false if !isOrdered(n) and checkOrder is true or !isLinear(n)
159 : : * if checkLinear is true, or false otherwise.
160 : : */
161 : : bool checkVariables(Node n, bool checkOrder, bool checkLinear);
162 : : /** contains free variables
163 : : *
164 : : * Returns true if the free variables of b are a subset of those in a, where
165 : : * we require a strict subset if strict is true. Free variables are those that
166 : : * occur in the range d_type_vars.
167 : : */
168 : : bool containsFreeVariables(Node a, Node b, bool strict = false);
169 : : //--------------------------end queries about terms
170 : :
171 : : protected:
172 : : /** sygus term database of d_qe */
173 : : TermDbSygus* d_tds;
174 : : /** term enumerator object (used for random sampling) */
175 : : TermEnumeration d_tenum;
176 : : /** samples */
177 : : std::vector<std::vector<Node> > d_samples;
178 : : /** data structure to check duplication of sample points */
179 : : class PtTrie
180 : : {
181 : : public:
182 : : /** add pt to this trie, returns true if pt is not a duplicate. */
183 : : bool add(std::vector<Node>& pt);
184 : :
185 : : private:
186 : : /** the children of this node */
187 : : std::map<Node, PtTrie> d_children;
188 : : };
189 : : /** a trie for samples */
190 : : PtTrie d_samples_trie;
191 : : /** the sygus type for this sampler (if applicable). */
192 : : TypeNode d_ftn;
193 : : /** whether we are registering terms of sygus types with this sampler */
194 : : bool d_use_sygus_type;
195 : : /**
196 : : * For each (sygus) type, a map from builtin terms to the sygus term they
197 : : * correspond to.
198 : : */
199 : : std::map<TypeNode, std::map<Node, Node> > d_builtin_to_sygus;
200 : : /** all variables we are sampling values for */
201 : : std::vector<Node> d_vars;
202 : : /** type variables
203 : : *
204 : : * We group variables according to "type ids". Two variables have the same
205 : : * type id if they have indistinguishable status according to this sampler.
206 : : * This is a finer-grained grouping than types. For example, two variables
207 : : * of the same type may have different type ids if they occur as constructors
208 : : * of a different set of sygus types in the grammar we are considering.
209 : : * For instance, we assign x and y different type ids in this grammar:
210 : : * A -> B + C
211 : : * B -> x | 0 | 1
212 : : * C -> y
213 : : * Type ids are computed for each variable in d_vars during initialize(...).
214 : : *
215 : : * For each type id, a list of variables in the grammar we are considering,
216 : : * for that type. These typically correspond to the arguments of the
217 : : * function-to-synthesize whose grammar we are considering.
218 : : */
219 : : std::map<unsigned, std::vector<Node> > d_type_vars;
220 : : /**
221 : : * A map all variables in the grammar we are considering to their index in
222 : : * d_type_vars.
223 : : */
224 : : std::map<Node, unsigned> d_var_index;
225 : : /** Map from variables to the id (the domain of d_type_vars). */
226 : : std::map<Node, unsigned> d_type_ids;
227 : : /** constants
228 : : *
229 : : * For each type, a list of constants in the grammar we are considering, for
230 : : * that type.
231 : : */
232 : : std::map<TypeNode, std::vector<Node> > d_type_consts;
233 : : /** a lazy trie for each type
234 : : *
235 : : * This stores the evaluation of all terms registered to this class.
236 : : *
237 : : * Notice if we are registering sygus terms with this class, then terms
238 : : * are grouped into this trie according to their sygus type, and not their
239 : : * builtin type. For example, for grammar:
240 : : * A -> x | B+1
241 : : * B -> x | 0 | 1 | B+B
242 : : * If we register C^B_+( C^B_x(), C^B_0() ) and C^A_x() with this class,
243 : : * then x+0 is registered to d_trie[B] and x is registered to d_trie[A],
244 : : * and no rewrite rule is reported. The reason for this is that otherwise
245 : : * we would end up reporting many useless rewrites since the same builtin
246 : : * term can be generated by multiple sygus types (e.g. C^B_x() and C^A_x()).
247 : : */
248 : : std::map<TypeNode, LazyTrie> d_trie;
249 : : /** is this sampler valid?
250 : : *
251 : : * A sampler can be invalid if sample points cannot be generated for a type
252 : : * of an argument to function f.
253 : : */
254 : : bool d_is_valid;
255 : : /** initialize samples
256 : : *
257 : : * Adds nsamples sample points to d_samples.
258 : : */
259 : : void initializeSamples(unsigned nsamples);
260 : : /** get random value for a type
261 : : *
262 : : * Returns a random value for the given type based on the random number
263 : : * generator. Currently, supported types:
264 : : *
265 : : * Bool, Bitvector : returns a random value in the range of that type.
266 : : * Int, String : returns a random string of values in (base 10) of random
267 : : * length, currently by a repeated coin flip.
268 : : * Real : returns the division of two random integers, where the denominator
269 : : * is omitted if it is zero.
270 : : */
271 : : Node getRandomValue(TypeNode tn);
272 : : /** get sygus random value
273 : : *
274 : : * Returns a random value based on the sygus type tn. The return value is
275 : : * a constant in the analog type of tn. This function chooses either to
276 : : * return a random value, or otherwise will construct a constant based on
277 : : * a random constructor of tn whose builtin operator is not a variable.
278 : : *
279 : : * rchance: the chance that the call to this function will be forbidden
280 : : * from making recursive calls and instead must return a value based on
281 : : * a nullary constructor of tn or based on getRandomValue above.
282 : : * rinc: the percentage to increment rchance on recursive calls.
283 : : *
284 : : * For example, consider the grammar:
285 : : * A -> x | y | 0 | 1 | +( A, A ) | ite( B, A, A )
286 : : * B -> A = A
287 : : * If we call this function on A and rchance is 0.0, there are five evenly
288 : : * chosen possibilities, either we return a random value via getRandomValue
289 : : * above, or we choose one of the four non-variable constructors of A.
290 : : * Say we choose ite, then we recursively call this function for
291 : : * B, A, and A, which return constants c1, c2, and c3. Then, this function
292 : : * returns the rewritten form of ite( c1, c2, c3 ).
293 : : * If on the other hand, rchance was 0.5 and rand() < 0.5. Then, we force
294 : : * this call to terminate by either selecting a random value via
295 : : * getRandomValue, 0 or 1.
296 : : */
297 : : Node getSygusRandomValue(TypeNode tn,
298 : : double rchance,
299 : : double rinc,
300 : : unsigned depth = 0);
301 : : /** map from sygus types to non-variable constructors */
302 : : std::map<TypeNode, std::vector<unsigned> > d_rvalue_cindices;
303 : : /** map from sygus types to non-variable nullary constructors */
304 : : std::map<TypeNode, std::vector<unsigned> > d_rvalue_null_cindices;
305 : : /** the random string alphabet */
306 : : std::vector<unsigned> d_rstring_alphabet;
307 : : /** map from variables to sygus types that include them */
308 : : std::map<Node, std::vector<TypeNode> > d_var_sygus_types;
309 : : /** map from constants to sygus types that include them */
310 : : std::map<Node, std::vector<TypeNode> > d_const_sygus_types;
311 : : /** register sygus type, initializes the above two data structures */
312 : : void registerSygusType(TypeNode tn);
313 : : };
314 : :
315 : : } // namespace quantifiers
316 : : } // namespace theory
317 : : } // namespace cvc5::internal
318 : :
319 : : #endif /* CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H */
|