Branch data Line data Source code
1 : : /******************************************************************************
2 : : * This file is part of the cvc5 project.
3 : : *
4 : : * Copyright (c) 2009-2026 by the authors listed in the file AUTHORS
5 : : * in the top-level source directory and their institutional affiliations.
6 : : * All rights reserved. See the file COPYING in the top-level source
7 : : * directory for licensing information.
8 : : * ****************************************************************************
9 : : *
10 : : * sygus_sampler
11 : : */
12 : :
13 : : #include "cvc5_private.h"
14 : :
15 : : #ifndef CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H
16 : : #define CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H
17 : :
18 : : #include <map>
19 : :
20 : : #include "smt/env_obj.h"
21 : : #include "theory/quantifiers/lazy_trie.h"
22 : : #include "theory/quantifiers/term_enumeration.h"
23 : :
24 : : namespace cvc5::internal {
25 : :
26 : : class Env;
27 : :
28 : : namespace theory {
29 : : namespace quantifiers {
30 : :
31 : : class TermDbSygus;
32 : :
33 : : /** SygusSampler
34 : : *
35 : : * This class can be used to test whether two expressions are equivalent
36 : : * by random sampling. We use this class for the following options:
37 : : *
38 : : * sygus-rr-synth: synthesize candidate rewrite rules by finding two terms
39 : : * t1 and t2 do not rewrite to the same term, but are identical when considering
40 : : * a set of sample points, and
41 : : *
42 : : * sygus-rr-verify: detect unsound rewrite rules by finding two terms t1 and
43 : : * t2 that rewrite to the same term, but not identical when considering a set
44 : : * of sample points.
45 : : *
46 : : * To use this class:
47 : : * (1) Call initialize( tds, f, nsamples) where f is sygus datatype term.
48 : : * (2) For terms n1....nm enumerated that correspond to builtin analog of sygus
49 : : * term f, we call registerTerm( n1 )....registerTerm( nm ). It may be the case
50 : : * that registerTerm( ni ) returns nj for some j < i. In this case, we have that
51 : : * ni and nj are equivalent under the sample points in this class.
52 : : *
53 : : * For example, say the grammar for f is:
54 : : * A = 0 | 1 | x | y | A+A | ite( B, A, A )
55 : : * B = A <= A
56 : : * If we call initialize( tds, f, 5 ), this class will generate 5 random sample
57 : : * points for (x,y), say (0,0), (1,1), (0,1), (1,0), (2,2). The return values
58 : : * of successive calls to registerTerm are listed below.
59 : : * registerTerm( 0 ) -> 0
60 : : * registerTerm( x ) -> x
61 : : * registerTerm( x+y ) -> x+y
62 : : * registerTerm( y+x ) -> x+y
63 : : * registerTerm( x+ite(x <= 1+1, 0, y ) ) -> x
64 : : * Notice that the number of sample points can be configured for the above
65 : : * options using sygus-samples=N.
66 : : */
67 : : class SygusSampler : protected EnvObj, public LazyTrieEvaluator
68 : : {
69 : : public:
70 : : SygusSampler(Env& env);
71 : 18613 : ~SygusSampler() override {}
72 : :
73 : : /** initialize
74 : : *
75 : : * tn : the return type of terms we will be testing with this class,
76 : : * vars : the variables we are testing substitutions for,
77 : : * nsamples : number of sample points this class will test,
78 : : * unique_type_ids : if this is set to true, then we consider each variable
79 : : * in vars to have a unique "type id". A type id is a finer-grained notion of
80 : : * type that is used to determine when a rewrite rule is redundant.
81 : : */
82 : : virtual void initialize(TypeNode tn,
83 : : const std::vector<Node>& vars,
84 : : unsigned nsamples,
85 : : bool unique_type_ids = false);
86 : : /** initialize sygus
87 : : *
88 : : * qe : pointer to quantifiers engine,
89 : : * f : a term of some SyGuS datatype type whose values we will be
90 : : * testing under the free variables in the grammar of f,
91 : : * nsamples : number of sample points this class will test,
92 : : * useSygusType : whether we will register terms with this sampler that have
93 : : * the same type as f. If this flag is false, then we will be registering
94 : : * terms of the analog of the type of f, that is, the builtin type that
95 : : * f's type encodes in the deep embedding.
96 : : */
97 : : virtual void initializeSygus(TypeNode ftn, unsigned nsamples);
98 : : /** register term n with this sampler database
99 : : *
100 : : * forceKeep is whether we wish to force that n is chosen as a representative
101 : : * value in the trie.
102 : : */
103 : : virtual Node registerTerm(Node n, bool forceKeep = false);
104 : : /** get number of sample points */
105 : 921 : unsigned getNumSamplePoints() const { return d_samples.size(); }
106 : : /** get variables, adds d_vars to vars */
107 : : void getVariables(std::vector<Node>& vars) const;
108 : : /** get sample point
109 : : *
110 : : * Appends sample point #index to the vector pt, d_vars to vars.
111 : : */
112 : : const std::vector<Node>& getSamplePoint(size_t index) const;
113 : : /** Add pt to the set of sample points considered by this sampler */
114 : : void addSamplePoint(const std::vector<Node>& pt);
115 : : /** evaluate n on sample point index */
116 : : Node evaluate(Node n, unsigned index) override;
117 : : /**
118 : : * Compute the variables from the domain of d_var_index that occur in n,
119 : : * store these in the vector fvs.
120 : : */
121 : : void computeFreeVariables(Node n, std::vector<Node>& fvs);
122 : : /**
123 : : * Returns the index of a sample point such that the evaluation of a and b
124 : : * diverge, or -1 if no such sample point exists.
125 : : */
126 : : int getDiffSamplePointIndex(Node a, Node b);
127 : :
128 : : //--------------------------queries about terms
129 : : /** is contiguous
130 : : *
131 : : * This returns whether n's free variables (terms occurring in the range of
132 : : * d_type_vars) are a prefix of the list of variables in d_type_vars for each
133 : : * type id. For instance, if d_type_vars[id] = { x, y } for some id, then
134 : : * 0, x, x+y, y+x are contiguous but y is not. This is useful for excluding
135 : : * terms from consideration that are alpha-equivalent to others.
136 : : */
137 : : bool isContiguous(Node n);
138 : : /** is ordered
139 : : *
140 : : * This returns whether n's free variables are in order with respect to
141 : : * variables in d_type_vars for each type. For instance, if
142 : : * d_type_vars[id] = { x, y } for some id, then 0, x, x+y are ordered but
143 : : * y and y+x are not.
144 : : */
145 : : bool isOrdered(Node n);
146 : : /** is linear
147 : : *
148 : : * This returns whether n contains at most one occurrence of each free
149 : : * variable. For example, x, x+y are linear, but x+x, (x-y)+y, (x+0)+(x+0) are
150 : : * non-linear.
151 : : */
152 : : bool isLinear(Node n);
153 : : /** check variables
154 : : *
155 : : * This returns false if !isOrdered(n) and checkOrder is true or !isLinear(n)
156 : : * if checkLinear is true, or false otherwise.
157 : : */
158 : : bool checkVariables(Node n, bool checkOrder, bool checkLinear);
159 : : /** contains free variables
160 : : *
161 : : * Returns true if the free variables of b are a subset of those in a, where
162 : : * we require a strict subset if strict is true. Free variables are those that
163 : : * occur in the range d_type_vars.
164 : : */
165 : : bool containsFreeVariables(Node a, Node b, bool strict = false);
166 : : //--------------------------end queries about terms
167 : :
168 : : protected:
169 : : /** term enumerator object (used for random sampling) */
170 : : TermEnumeration d_tenum;
171 : : /** samples */
172 : : std::vector<std::vector<Node> > d_samples;
173 : : /** data structure to check duplication of sample points */
174 : : class PtTrie
175 : : {
176 : : public:
177 : : /** add pt to this trie, returns true if pt is not a duplicate. */
178 : : bool add(std::vector<Node>& pt);
179 : :
180 : : private:
181 : : /** the children of this node */
182 : : std::map<Node, PtTrie> d_children;
183 : : };
184 : : /** a trie for samples */
185 : : PtTrie d_samples_trie;
186 : : /** the sygus type for this sampler (if applicable). */
187 : : TypeNode d_ftn;
188 : : /** whether we are registering terms of sygus types with this sampler */
189 : : bool d_use_sygus_type;
190 : : /**
191 : : * For each (sygus) type, a map from builtin terms to the sygus term they
192 : : * correspond to.
193 : : */
194 : : std::map<TypeNode, std::map<Node, Node> > d_builtin_to_sygus;
195 : : /** all variables we are sampling values for */
196 : : std::vector<Node> d_vars;
197 : : /** type variables
198 : : *
199 : : * We group variables according to "type ids". Two variables have the same
200 : : * type id if they have indistinguishable status according to this sampler.
201 : : * This is a finer-grained grouping than types. For example, two variables
202 : : * of the same type may have different type ids if they occur as constructors
203 : : * of a different set of sygus types in the grammar we are considering.
204 : : * For instance, we assign x and y different type ids in this grammar:
205 : : * A -> B + C
206 : : * B -> x | 0 | 1
207 : : * C -> y
208 : : * Type ids are computed for each variable in d_vars during initialize(...).
209 : : *
210 : : * For each type id, a list of variables in the grammar we are considering,
211 : : * for that type. These typically correspond to the arguments of the
212 : : * function-to-synthesize whose grammar we are considering.
213 : : */
214 : : std::map<unsigned, std::vector<Node> > d_type_vars;
215 : : /**
216 : : * A map all variables in the grammar we are considering to their index in
217 : : * d_type_vars.
218 : : */
219 : : std::map<Node, unsigned> d_var_index;
220 : : /** Map from variables to the id (the domain of d_type_vars). */
221 : : std::map<Node, unsigned> d_type_ids;
222 : : /** constants
223 : : *
224 : : * For each type, a list of constants in the grammar we are considering, for
225 : : * that type.
226 : : */
227 : : std::map<TypeNode, std::vector<Node> > d_type_consts;
228 : : /** a lazy trie for each type
229 : : *
230 : : * This stores the evaluation of all terms registered to this class.
231 : : *
232 : : * Notice if we are registering sygus terms with this class, then terms
233 : : * are grouped into this trie according to their sygus type, and not their
234 : : * builtin type. For example, for grammar:
235 : : * A -> x | B+1
236 : : * B -> x | 0 | 1 | B+B
237 : : * If we register C^B_+( C^B_x(), C^B_0() ) and C^A_x() with this class,
238 : : * then x+0 is registered to d_trie[B] and x is registered to d_trie[A],
239 : : * and no rewrite rule is reported. The reason for this is that otherwise
240 : : * we would end up reporting many useless rewrites since the same builtin
241 : : * term can be generated by multiple sygus types (e.g. C^B_x() and C^A_x()).
242 : : */
243 : : std::map<TypeNode, LazyTrie> d_trie;
244 : : /** is this sampler valid?
245 : : *
246 : : * A sampler can be invalid if sample points cannot be generated for a type
247 : : * of an argument to function f.
248 : : */
249 : : bool d_is_valid;
250 : : /** initialize samples
251 : : *
252 : : * Adds nsamples sample points to d_samples.
253 : : */
254 : : void initializeSamples(unsigned nsamples);
255 : : /** get random value for a type
256 : : *
257 : : * Returns a random value for the given type based on the random number
258 : : * generator. Currently, supported types:
259 : : *
260 : : * Bool, Bitvector : returns a random value in the range of that type.
261 : : * Int, String : returns a random string of values in (base 10) of random
262 : : * length, currently by a repeated coin flip.
263 : : * Real : returns the division of two random integers, where the denominator
264 : : * is omitted if it is zero.
265 : : */
266 : : Node getRandomValue(TypeNode tn);
267 : : /** get sygus random value
268 : : *
269 : : * Returns a random value based on the sygus type tn. The return value is
270 : : * a constant in the analog type of tn. This function chooses either to
271 : : * return a random value, or otherwise will construct a constant based on
272 : : * a random constructor of tn whose builtin operator is not a variable.
273 : : *
274 : : * rchance: the chance that the call to this function will be forbidden
275 : : * from making recursive calls and instead must return a value based on
276 : : * a nullary constructor of tn or based on getRandomValue above.
277 : : * rinc: the percentage to increment rchance on recursive calls.
278 : : *
279 : : * For example, consider the grammar:
280 : : * A -> x | y | 0 | 1 | +( A, A ) | ite( B, A, A )
281 : : * B -> A = A
282 : : * If we call this function on A and rchance is 0.0, there are five evenly
283 : : * chosen possibilities, either we return a random value via getRandomValue
284 : : * above, or we choose one of the four non-variable constructors of A.
285 : : * Say we choose ite, then we recursively call this function for
286 : : * B, A, and A, which return constants c1, c2, and c3. Then, this function
287 : : * returns the rewritten form of ite( c1, c2, c3 ).
288 : : * If on the other hand, rchance was 0.5 and rand() < 0.5. Then, we force
289 : : * this call to terminate by either selecting a random value via
290 : : * getRandomValue, 0 or 1.
291 : : */
292 : : Node getSygusRandomValue(TypeNode tn,
293 : : double rchance,
294 : : double rinc,
295 : : unsigned depth = 0);
296 : : /** map from sygus types to non-variable constructors */
297 : : std::map<TypeNode, std::vector<unsigned> > d_rvalue_cindices;
298 : : /** map from sygus types to non-variable nullary constructors */
299 : : std::map<TypeNode, std::vector<unsigned> > d_rvalue_null_cindices;
300 : : /** the random string alphabet */
301 : : std::vector<unsigned> d_rstring_alphabet;
302 : : /** map from variables to sygus types that include them */
303 : : std::map<Node, std::vector<TypeNode> > d_var_sygus_types;
304 : : /** map from constants to sygus types that include them */
305 : : std::map<Node, std::vector<TypeNode> > d_const_sygus_types;
306 : : /** register sygus type, initializes the above two data structures */
307 : : void registerSygusType(TypeNode tn);
308 : : };
309 : :
310 : : } // namespace quantifiers
311 : : } // namespace theory
312 : : } // namespace cvc5::internal
313 : :
314 : : #endif /* CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H */
|