LCOV - coverage.info - /buildbot/coverage/build/src/theory/strings/regexp

LCOV - code coverage report

Current view:	top level - buildbot/coverage/build/src/theory/strings - regexp_elim.cpp (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	322	350	92.0 %
Date:	2026-05-08 10:22:53	Functions:	7	7	100.0 %
		Branches:	188	278	67.6 %

           Branch data     Line data    Source code

       1                 :            : /******************************************************************************
       2                 :            :  * This file is part of the cvc5 project.
       3                 :            :  *
       4                 :            :  * Copyright (c) 2009-2026 by the authors listed in the file AUTHORS
       5                 :            :  * in the top-level source directory and their institutional affiliations.
       6                 :            :  * All rights reserved.  See the file COPYING in the top-level source
       7                 :            :  * directory for licensing information.
       8                 :            :  * ****************************************************************************
       9                 :            :  *
      10                 :            :  * Implementation of techniques for eliminating regular expressions.
      11                 :            :  */
      12                 :            : 
      13                 :            : #include "theory/strings/regexp_elim.h"
      14                 :            : 
      15                 :            : #include "expr/bound_var_manager.h"
      16                 :            : #include "options/strings_options.h"
      17                 :            : #include "proof/proof_node_manager.h"
      18                 :            : #include "smt/env.h"
      19                 :            : #include "theory/rewriter.h"
      20                 :            : #include "theory/strings/regexp_entail.h"
      21                 :            : #include "theory/strings/theory_strings_utils.h"
      22                 :            : #include "theory/strings/word.h"
      23                 :            : #include "util/rational.h"
      24                 :            : #include "util/string.h"
      25                 :            : 
      26                 :            : using namespace cvc5::internal::kind;
      27                 :            : 
      28                 :            : namespace cvc5::internal {
      29                 :            : namespace theory {
      30                 :            : namespace strings {
      31                 :            : 
      32                 :      51092 : RegExpElimination::RegExpElimination(Env& env, bool isAgg, context::Context* c)
      33                 :            :     : EnvObj(env),
      34                 :      51092 :       d_isAggressive(isAgg),
      35                 :      61877 :       d_epg(!env.isTheoryProofProducing()
      36                 :            :                 ? nullptr
      37                 :      61877 :                 : new EagerProofGenerator(env, c, "RegExpElimination::epg"))
      38                 :            : {
      39                 :      51092 : }
      40                 :            : 
      41                 :        264 : Node RegExpElimination::eliminate(Node atom, bool isAgg)
      42                 :            : {
      43 [ -  + ][ -  + ]:        264 :   Assert(atom.getKind() == Kind::STRING_IN_REGEXP);
                 [ -  - ]
      44         [ +  + ]:        264 :   if (atom[1].getKind() == Kind::REGEXP_CONCAT)
      45                 :            :   {
      46                 :        158 :     return eliminateConcat(atom, isAgg);
      47                 :            :   }
      48         [ +  - ]:        106 :   else if (atom[1].getKind() == Kind::REGEXP_STAR)
      49                 :            :   {
      50                 :        106 :     return eliminateStar(atom, isAgg);
      51                 :            :   }
      52                 :          0 :   return Node::null();
      53                 :            : }
      54                 :            : 
      55                 :        264 : TrustNode RegExpElimination::eliminateTrusted(Node atom)
      56                 :            : {
      57                 :        264 :   Node eatom = eliminate(atom, d_isAggressive);
      58         [ +  + ]:        264 :   if (!eatom.isNull())
      59                 :            :   {
      60         [ +  + ]:        196 :     if (isProofEnabled())
      61                 :            :     {
      62                 :         88 :       ProofNodeManager* pnm = d_env.getProofNodeManager();
      63                 :         88 :       Node eq = atom.eqNode(eatom);
      64                 :            :       std::shared_ptr<ProofNode> pn =
      65                 :        176 :           pnm->mkTrustedNode(TrustId::RE_ELIM, {}, {}, eq);
      66                 :         88 :       d_epg->setProofFor(eq, pn);
      67         [ +  - ]:         88 :       return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get());
      68                 :         88 :     }
      69                 :        108 :     return TrustNode::mkTrustRewrite(atom, eatom, nullptr);
      70                 :            :   }
      71                 :         68 :   return TrustNode::null();
      72                 :        264 : }
      73                 :            : 
      74                 :        158 : Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
      75                 :            : {
      76                 :        158 :   NodeManager* nm = atom.getNodeManager();
      77                 :        158 :   BoundVarManager* bvm = nm->getBoundVarManager();
      78                 :        158 :   Node x = atom[0];
      79                 :        158 :   Node lenx = nm->mkNode(Kind::STRING_LENGTH, x);
      80                 :        158 :   Node re = atom[1];
      81                 :        158 :   Node zero = nm->mkConstInt(Rational(0));
      82                 :        158 :   std::vector<Node> children;
      83                 :        158 :   utils::getConcat(re, children);
      84                 :            : 
      85                 :            :   // If it can be reduced to memberships in fixed length regular expressions.
      86                 :            :   // This includes concatenations where at most one child is of the form
      87                 :            :   // (re.* re.allchar), which we abbreviate _* below, and all other children
      88                 :            :   // have a fixed length.
      89                 :            :   // The intuition why this is a "non-aggressive" rewrite is that membership
      90                 :            :   // into fixed length regular expressions are easy to handle.
      91                 :            :   // the index of _* in re
      92                 :        158 :   unsigned pivotIndex = 0;
      93                 :        158 :   bool hasPivotIndex = false;
      94                 :        158 :   bool hasFixedLength = true;
      95                 :        158 :   std::vector<Node> childLengths;
      96                 :        158 :   std::vector<Node> childLengthsPostPivot;
      97         [ +  + ]:        705 :   for (unsigned i = 0, size = children.size(); i < size; i++)
      98                 :            :   {
      99                 :        547 :     Node c = children[i];
     100                 :        547 :     Node fl = RegExpEntail::getFixedLengthForRegexp(c);
     101         [ +  + ]:        547 :     if (fl.isNull())
     102                 :            :     {
     103         [ +  + ]:        170 :       if (!hasPivotIndex && c.getKind() == Kind::REGEXP_STAR
     104 [ +  + ][ +  + ]:        368 :           && c[0].getKind() == Kind::REGEXP_ALLCHAR)
         [ +  + ][ +  + ]
                 [ -  - ]
     105                 :            :       {
     106                 :         25 :         hasPivotIndex = true;
     107                 :         25 :         pivotIndex = i;
     108                 :            :         // zero is used in sum below and is used for concat-fixed-len
     109                 :         25 :         fl = zero;
     110                 :            :       }
     111                 :            :       else
     112                 :            :       {
     113                 :        173 :         hasFixedLength = false;
     114                 :            :       }
     115                 :            :     }
     116         [ +  + ]:        547 :     if (!fl.isNull())
     117                 :            :     {
     118                 :        374 :       childLengths.push_back(fl);
     119         [ +  + ]:        374 :       if (hasPivotIndex)
     120                 :            :       {
     121                 :        145 :         childLengthsPostPivot.push_back(fl);
     122                 :            :       }
     123                 :            :     }
     124                 :        547 :   }
     125                 :        158 :   Node lenSum = childLengths.size() > 1
     126                 :            :                     ? nm->mkNode(Kind::ADD, childLengths)
     127 [ +  + ][ +  + ]:        158 :                     : (childLengths.empty() ? zero : childLengths[0]);
     128                 :            :   // if we have a fixed length
     129         [ +  + ]:        158 :   if (hasFixedLength)
     130                 :            :   {
     131 [ -  + ][ -  + ]:         29 :     Assert(re.getNumChildren() == children.size());
                 [ -  - ]
     132                 :         29 :     std::vector<Node> conc;
     133                 :         29 :     conc.push_back(
     134         [ +  + ]:         58 :         nm->mkNode(hasPivotIndex ? Kind::GEQ : Kind::EQUAL, lenx, lenSum));
     135                 :         29 :     Node currEnd = zero;
     136         [ +  + ]:        202 :     for (unsigned i = 0, size = childLengths.size(); i < size; i++)
     137                 :            :     {
     138 [ +  + ][ +  + ]:        173 :       if (hasPivotIndex && i == pivotIndex)
     139                 :            :       {
     140                 :          4 :         Node ppSum = childLengthsPostPivot.size() == 1
     141                 :          0 :                          ? childLengthsPostPivot[0]
     142         [ -  + ]:          4 :                          : nm->mkNode(Kind::ADD, childLengthsPostPivot);
     143                 :          4 :         currEnd = nm->mkNode(Kind::SUB, lenx, ppSum);
     144                 :          4 :       }
     145                 :            :       else
     146                 :            :       {
     147                 :            :         Node curr =
     148                 :        338 :             nm->mkNode(Kind::STRING_SUBSTR, x, currEnd, childLengths[i]);
     149                 :            :         // We do not need to include memberships of the form
     150                 :            :         //   (str.substr x n 1) in re.allchar
     151                 :            :         // since we know that by construction, n < len( x ).
     152         [ +  + ]:        169 :         if (re[i].getKind() != Kind::REGEXP_ALLCHAR)
     153                 :            :         {
     154                 :        184 :           Node currMem = nm->mkNode(Kind::STRING_IN_REGEXP, curr, re[i]);
     155                 :         92 :           conc.push_back(currMem);
     156                 :         92 :         }
     157                 :        169 :         currEnd = nm->mkNode(Kind::ADD, currEnd, childLengths[i]);
     158                 :        169 :       }
     159                 :            :     }
     160                 :         29 :     Node res = nm->mkNode(Kind::AND, conc);
     161                 :            :     // For example:
     162                 :            :     //   x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") -->
     163                 :            :     //   len( x ) = 3 ^
     164                 :            :     //   substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^
     165                 :            :     //   substr(x,1,2) in "AB"
     166                 :            :     // An example with a pivot index:
     167                 :            :     //   x in re.++( "AB" ++ _* ++ "C" ) -->
     168                 :            :     //   len( x ) >= 3 ^
     169                 :            :     //   substr( x, 0, 2 ) in "AB" ^
     170                 :            :     //   substr( x, len( x ) - 1, 1 ) in "C"
     171                 :         29 :     return returnElim(atom, res, "concat-fixed-len");
     172                 :         29 :   }
     173                 :            : 
     174                 :            :   // memberships of the form x in re.++ * s1 * ... * sn *, where * are
     175                 :            :   // any number of repetitions (exact or indefinite) of re.allchar.
     176         [ +  - ]:        129 :   Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
     177                 :        129 :   bool onlySigmasAndConsts = true;
     178                 :        129 :   std::vector<Node> sep_children;
     179                 :        129 :   std::vector<unsigned> gap_minsize;
     180                 :        129 :   std::vector<bool> gap_exact;
     181                 :            :   // the first gap is initially strict zero
     182                 :        129 :   gap_minsize.push_back(0);
     183                 :        129 :   gap_exact.push_back(true);
     184         [ +  + ]:        310 :   for (const Node& c : children)
     185                 :            :   {
     186         [ +  - ]:        289 :     Trace("re-elim-debug") << "  " << c << std::endl;
     187                 :        289 :     onlySigmasAndConsts = false;
     188         [ +  + ]:        289 :     if (c.getKind() == Kind::STRING_TO_REGEXP)
     189                 :            :     {
     190                 :        125 :       onlySigmasAndConsts = true;
     191                 :        125 :       sep_children.push_back(c[0]);
     192                 :            :       // the next gap is initially strict zero
     193                 :        125 :       gap_minsize.push_back(0);
     194                 :        125 :       gap_exact.push_back(true);
     195                 :            :     }
     196                 :        492 :     else if (c.getKind() == Kind::REGEXP_STAR
     197 [ +  + ][ +  + ]:        164 :              && c[0].getKind() == Kind::REGEXP_ALLCHAR)
         [ +  + ][ +  + ]
                 [ -  - ]
     198                 :            :     {
     199                 :            :       // found a gap of any size
     200                 :         49 :       onlySigmasAndConsts = true;
     201                 :         49 :       gap_exact[gap_exact.size() - 1] = false;
     202                 :            :     }
     203         [ +  + ]:        115 :     else if (c.getKind() == Kind::REGEXP_ALLCHAR)
     204                 :            :     {
     205                 :            :       // add one to the minimum size of the gap
     206                 :          7 :       onlySigmasAndConsts = true;
     207                 :          7 :       gap_minsize[gap_minsize.size() - 1]++;
     208                 :            :     }
     209         [ +  + ]:        289 :     if (!onlySigmasAndConsts)
     210                 :            :     {
     211         [ +  - ]:        108 :       Trace("re-elim-debug") << "...cannot handle " << c << std::endl;
     212                 :        108 :       break;
     213                 :            :     }
     214                 :            :   }
     215                 :            :   // we should always rewrite concatenations that are purely re.allchar
     216                 :            :   // and re.*( re.allchar ).
     217 [ +  + ][ +  - ]:        129 :   Assert(!onlySigmasAndConsts || !sep_children.empty());
         [ -  + ][ -  + ]
                 [ -  - ]
     218 [ +  + ][ +  - ]:        129 :   if (onlySigmasAndConsts && !sep_children.empty())
                 [ +  + ]
     219                 :            :   {
     220                 :         21 :     bool canProcess = true;
     221                 :         21 :     std::vector<Node> conj;
     222                 :            :     // The following constructs a set of constraints that encodes that a
     223                 :            :     // set of string terms are found, in order, in string x.
     224                 :            :     // prev_end stores the current (symbolic) index in x that we are
     225                 :            :     // searching.
     226                 :         21 :     Node prev_end = zero;
     227                 :            :     // the symbolic index we start searching, for each child in sep_children.
     228                 :         21 :     std::vector<Node> prev_ends;
     229                 :         21 :     unsigned gap_minsize_end = gap_minsize.back();
     230                 :         21 :     bool gap_exact_end = gap_exact.back();
     231                 :         21 :     std::vector<Node> non_greedy_find_vars;
     232         [ +  + ]:         84 :     for (unsigned i = 0, size = sep_children.size(); i < size; i++)
     233                 :            :     {
     234         [ +  + ]:         63 :       if (gap_minsize[i] > 0)
     235                 :            :       {
     236                 :            :         // the gap to this child is at least gap_minsize[i]
     237                 :         14 :         prev_end = nm->mkNode(
     238                 :         21 :             Kind::ADD, prev_end, nm->mkConstInt(Rational(gap_minsize[i])));
     239                 :            :       }
     240                 :         63 :       prev_ends.push_back(prev_end);
     241                 :         63 :       Node sc = sep_children[i];
     242                 :         63 :       Node lensc = nm->mkNode(Kind::STRING_LENGTH, sc);
     243         [ +  + ]:         63 :       if (gap_exact[i])
     244                 :            :       {
     245                 :            :         // if the gap is exact, it is a substring constraint
     246                 :         21 :         Node curr = prev_end;
     247                 :         42 :         Node ss = nm->mkNode(Kind::STRING_SUBSTR, x, curr, lensc);
     248                 :         21 :         conj.push_back(ss.eqNode(sc));
     249                 :         21 :         prev_end = nm->mkNode(Kind::ADD, curr, lensc);
     250                 :         21 :       }
     251                 :            :       else
     252                 :            :       {
     253                 :            :         // otherwise, we can use indexof to represent some next occurrence
     254 [ +  + ][ -  + ]:         42 :         if (gap_exact[i + 1] && i + 1 != size)
                 [ -  + ]
     255                 :            :         {
     256         [ -  - ]:          0 :           if (!isAgg)
     257                 :            :           {
     258                 :          0 :             canProcess = false;
     259                 :          0 :             break;
     260                 :            :           }
     261                 :            :           // if the gap after this one is strict, we need a non-greedy find
     262                 :            :           // thus, we add a symbolic constant
     263                 :            :           Node cacheVal =
     264                 :          0 :               BoundVarManager::getCacheValue(atom, nm->mkConstInt(Rational(i)));
     265                 :          0 :           TypeNode intType = nm->integerType();
     266                 :            :           Node k = bvm->mkBoundVar(
     267                 :          0 :               BoundVarId::STRINGS_RE_ELIM_CONCAT_INDEX, cacheVal, intType);
     268                 :          0 :           non_greedy_find_vars.push_back(k);
     269                 :          0 :           prev_end = nm->mkNode(Kind::ADD, prev_end, k);
     270                 :          0 :         }
     271                 :         84 :         Node curr = nm->mkNode(Kind::STRING_INDEXOF, x, sc, prev_end);
     272                 :         84 :         Node idofFind = curr.eqNode(nm->mkConstInt(Rational(-1))).negate();
     273                 :         42 :         conj.push_back(idofFind);
     274                 :         42 :         prev_end = nm->mkNode(Kind::ADD, curr, lensc);
     275                 :         42 :       }
     276 [ +  - ][ +  - ]:         63 :     }
     277                 :            : 
     278         [ +  - ]:         21 :     if (canProcess)
     279                 :            :     {
     280                 :            :       // since sep_children is non-empty, conj is non-empty
     281 [ -  + ][ -  + ]:         21 :       Assert(!conj.empty());
                 [ -  - ]
     282                 :            :       // Process the last gap, if necessary.
     283                 :            :       // Notice that if the last gap is not exact and its minsize is zero,
     284                 :            :       // then the last indexof/substr constraint entails the following
     285                 :            :       // constraint, so it is not necessary to add.
     286                 :            :       // Below, we may write "A" for (str.to.re "A") and _ for re.allchar:
     287                 :         21 :       Node cEnd = nm->mkConstInt(Rational(gap_minsize_end));
     288         [ +  + ]:         21 :       if (gap_exact_end)
     289                 :            :       {
     290 [ -  + ][ -  + ]:         14 :         Assert(!sep_children.empty());
                 [ -  - ]
     291                 :            :         // if it is strict, it corresponds to a substr case.
     292                 :            :         // For example:
     293                 :            :         //     x in (re.++ "A" (re.* _) "B" _ _) --->
     294                 :            :         //        ... ^ "B" = substr( x, len( x ) - 3, 1 )  ^ ...
     295                 :         14 :         Node sc = sep_children.back();
     296                 :         14 :         Node lenSc = nm->mkNode(Kind::STRING_LENGTH, sc);
     297                 :            :         Node loc =
     298                 :         28 :             nm->mkNode(Kind::SUB, lenx, nm->mkNode(Kind::ADD, lenSc, cEnd));
     299                 :         28 :         Node scc = sc.eqNode(nm->mkNode(Kind::STRING_SUBSTR, x, loc, lenSc));
     300                 :            :         // We also must ensure that we fit. This constraint is necessary in
     301                 :            :         // addition to the constraint above. Take this example:
     302                 :            :         //     x in (re.++ "A" _ (re.* _) "B" _) --->
     303                 :            :         //       substr( x, 0, 1 ) = "A" ^             // find "A"
     304                 :            :         //       indexof( x, "B", 2 ) != -1 ^          // find "B" >=1 after "A"
     305                 :            :         //       substr( x, len(x)-2, 1 ) = "B" ^      // "B" is at end - 2.
     306                 :            :         //       indexof( x, "B", 2 ) <= len( x ) - 2
     307                 :            :         // The last constaint ensures that the second and third constraints
     308                 :            :         // may refer to the same "B". If it were not for the last constraint, it
     309                 :            :         // would have been the case than "ABB" would be a model for x, where
     310                 :            :         // the second constraint refers to the third position, and the third
     311                 :            :         // constraint refers to the second position.
     312                 :            :         //
     313                 :            :         // With respect to the above example, the following is an optimization.
     314                 :            :         // For that example, we instead produce:
     315                 :            :         //     x in (re.++ "A" _ (re.* _) "B" _) --->
     316                 :            :         //       substr( x, 0, 1 ) = "A" ^          // find "A"
     317                 :            :         //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
     318                 :            :         //       2 <= len( x ) - 2
     319                 :            :         // The intuition is that above, there are two constraints that insist
     320                 :            :         // that "B" is found, whereas we only need one. The last constraint
     321                 :            :         // above says that the "B" we find at end-2 can be found >=1 after
     322                 :            :         // the "A".
     323                 :         14 :         conj.pop_back();
     324                 :            :         Node fit = nm->mkNode(
     325                 :         28 :             gap_exact[sep_children.size() - 1] ? Kind::EQUAL : Kind::LEQ,
     326                 :         14 :             prev_ends.back(),
     327         [ -  + ]:         56 :             loc);
     328                 :            : 
     329                 :         14 :         conj.push_back(scc);
     330                 :         14 :         conj.push_back(fit);
     331                 :         14 :       }
     332         [ -  + ]:          7 :       else if (gap_minsize_end > 0)
     333                 :            :       {
     334                 :            :         // if it is non-strict, we are in a "greedy find" situtation where
     335                 :            :         // we just need to ensure that the next occurrence fits.
     336                 :            :         // For example:
     337                 :            :         //     x in (re.++ "A" (re.* _) "B" _ _ (re.* _)) --->
     338                 :            :         //        ... ^ indexof( x, "B", 1 ) + 2 <= len( x )
     339                 :            :         Node fit =
     340                 :          0 :             nm->mkNode(Kind::LEQ, nm->mkNode(Kind::ADD, prev_end, cEnd), lenx);
     341                 :          0 :         conj.push_back(fit);
     342                 :          0 :       }
     343                 :         21 :       Node res = nm->mkAnd(conj);
     344                 :            :       // process the non-greedy find variables
     345         [ -  + ]:         21 :       if (!non_greedy_find_vars.empty())
     346                 :            :       {
     347                 :          0 :         std::vector<Node> children2;
     348         [ -  - ]:          0 :         for (const Node& v : non_greedy_find_vars)
     349                 :            :         {
     350                 :          0 :           Node bound = nm->mkNode(
     351                 :            :               Kind::AND,
     352                 :          0 :               {nm->mkNode(Kind::LEQ, zero, v), nm->mkNode(Kind::LT, v, lenx)});
     353                 :          0 :           children2.push_back(bound);
     354                 :          0 :         }
     355                 :          0 :         children2.push_back(res);
     356                 :          0 :         Node body = nm->mkNode(Kind::AND, children2);
     357                 :          0 :         Node bvl = nm->mkNode(Kind::BOUND_VAR_LIST, non_greedy_find_vars);
     358                 :          0 :         res = utils::mkForallInternal(nm, bvl, body.negate()).negate();
     359                 :          0 :       }
     360                 :            :       // must also give a minimum length requirement
     361                 :         21 :       res = nm->mkNode(Kind::AND, res, nm->mkNode(Kind::GEQ, lenx, lenSum));
     362                 :            :       // Examples of this elimination:
     363                 :            :       //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
     364                 :            :       //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
     365                 :            :       //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
     366                 :            :       //     indexof(x,"A",0)!=-1 ^
     367                 :            :       //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
     368                 :            :       //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) ^
     369                 :            :       //     len(x) >= 7
     370                 :            : 
     371                 :            :       // An example of a non-greedy find:
     372                 :            :       //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
     373                 :            :       //     (exists k. 0 <= k < len( x ) ^
     374                 :            :       //               indexof( x, "A", k ) != -1 ^
     375                 :            :       //               substr( x, indexof( x, "A", k )+2, 1 ) = "B") ^
     376                 :            :       //     len(x) >= 3
     377                 :         21 :       return returnElim(atom, res, "concat-with-gaps");
     378                 :         21 :     }
     379 [ -  + ][ -  + ]:         84 :   }
         [ -  + ][ -  + ]
     380                 :            : 
     381         [ +  + ]:        108 :   if (!isAgg)
     382                 :            :   {
     383                 :         35 :     return Node::null();
     384                 :            :   }
     385                 :            :   // only aggressive rewrites below here
     386                 :            : 
     387                 :            :   // if the first or last child is constant string, we can split the membership
     388                 :            :   // into a conjunction of two memberships.
     389                 :         73 :   Node sStartIndex = zero;
     390                 :         73 :   Node sLength = lenx;
     391                 :         73 :   std::vector<Node> sConstraints;
     392                 :         73 :   std::vector<Node> rexpElimChildren;
     393                 :         73 :   unsigned nchildren = children.size();
     394 [ -  + ][ -  + ]:         73 :   Assert(nchildren > 1);
                 [ -  - ]
     395         [ +  + ]:        219 :   for (unsigned r = 0; r < 2; r++)
     396                 :            :   {
     397         [ +  + ]:        146 :     unsigned index = r == 0 ? 0 : nchildren - 1;
     398                 :        146 :     Node c = children[index];
     399         [ +  + ]:        146 :     if (c.getKind() == Kind::STRING_TO_REGEXP)
     400                 :            :     {
     401 [ +  + ][ -  + ]:         66 :       Assert(children[index + (r == 0 ? 1 : -1)].getKind()
         [ -  + ][ -  - ]
     402                 :            :              != Kind::STRING_TO_REGEXP);
     403                 :         66 :       Node s = c[0];
     404                 :         66 :       Node lens = nm->mkNode(Kind::STRING_LENGTH, s);
     405                 :         77 :       Node sss = r == 0 ? zero : nm->mkNode(Kind::SUB, lenx, lens);
     406                 :        132 :       Node ss = nm->mkNode(Kind::STRING_SUBSTR, x, sss, lens);
     407                 :         66 :       sConstraints.push_back(ss.eqNode(s));
     408         [ +  + ]:         66 :       if (r == 0)
     409                 :            :       {
     410                 :         55 :         sStartIndex = lens;
     411                 :            :       }
     412 [ +  - ][ +  + ]:         11 :       else if (r == 1 && sConstraints.size() == 2)
                 [ +  + ]
     413                 :            :       {
     414                 :            :         // first and last children cannot overlap
     415                 :          4 :         Node bound = nm->mkNode(Kind::GEQ, sss, sStartIndex);
     416                 :          2 :         sConstraints.push_back(bound);
     417                 :          2 :       }
     418                 :         66 :       sLength = nm->mkNode(Kind::SUB, sLength, lens);
     419                 :         66 :     }
     420 [ +  + ][ +  + ]:        146 :     if (r == 1 && !sConstraints.empty())
                 [ +  + ]
     421                 :            :     {
     422                 :            :       // add the middle children
     423         [ +  + ]:         66 :       for (unsigned i = 1; i < (nchildren - 1); i++)
     424                 :            :       {
     425                 :          2 :         rexpElimChildren.push_back(children[i]);
     426                 :            :       }
     427                 :            :     }
     428         [ +  + ]:        146 :     if (c.getKind() != Kind::STRING_TO_REGEXP)
     429                 :            :     {
     430                 :         80 :       rexpElimChildren.push_back(c);
     431                 :            :     }
     432                 :        146 :   }
     433         [ +  + ]:         73 :   if (!sConstraints.empty())
     434                 :            :   {
     435                 :        128 :     Node ss = nm->mkNode(Kind::STRING_SUBSTR, x, sStartIndex, sLength);
     436 [ -  + ][ -  + ]:         64 :     Assert(!rexpElimChildren.empty());
                 [ -  - ]
     437                 :         64 :     Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType());
     438                 :         64 :     sConstraints.push_back(nm->mkNode(Kind::STRING_IN_REGEXP, ss, regElim));
     439                 :         64 :     Node ret = nm->mkNode(Kind::AND, sConstraints);
     440                 :            :     // e.g.
     441                 :            :     // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R
     442                 :         64 :     return returnElim(atom, ret, "concat-splice");
     443                 :         64 :   }
     444 [ -  + ][ -  + ]:          9 :   Assert(nchildren > 1);
                 [ -  - ]
     445         [ +  - ]:         18 :   for (unsigned i = 0; i < nchildren; i++)
     446                 :            :   {
     447         [ +  + ]:         18 :     if (children[i].getKind() == Kind::STRING_TO_REGEXP)
     448                 :            :     {
     449                 :          9 :       Node s = children[i][0];
     450                 :          9 :       Node lens = nm->mkNode(Kind::STRING_LENGTH, s);
     451                 :            :       // there exists an index in this string such that the substring is this
     452                 :          9 :       Node k;
     453                 :          9 :       std::vector<Node> echildren;
     454         [ -  + ]:          9 :       if (i == 0)
     455                 :            :       {
     456                 :          0 :         k = zero;
     457                 :            :       }
     458         [ -  + ]:          9 :       else if (i + 1 == nchildren)
     459                 :            :       {
     460                 :          0 :         k = nm->mkNode(Kind::SUB, lenx, lens);
     461                 :            :       }
     462                 :            :       else
     463                 :            :       {
     464                 :            :         Node cacheVal =
     465                 :         18 :             BoundVarManager::getCacheValue(atom, nm->mkConstInt(Rational(i)));
     466                 :          9 :         TypeNode intType = nm->integerType();
     467                 :         18 :         k = bvm->mkBoundVar(
     468                 :          9 :             BoundVarId::STRINGS_RE_ELIM_CONCAT_INDEX, cacheVal, intType);
     469                 :         27 :         Node bound = nm->mkNode(
     470                 :            :             Kind::AND,
     471                 :         18 :             {nm->mkNode(Kind::LEQ, zero, k),
     472                 :         18 :              nm->mkNode(Kind::LEQ, k, nm->mkNode(Kind::SUB, lenx, lens))});
     473                 :          9 :         echildren.push_back(bound);
     474                 :          9 :       }
     475                 :         18 :       Node substrEq = nm->mkNode(Kind::STRING_SUBSTR, x, k, lens).eqNode(s);
     476                 :          9 :       echildren.push_back(substrEq);
     477         [ +  - ]:          9 :       if (i > 0)
     478                 :            :       {
     479                 :          9 :         std::vector<Node> rprefix;
     480                 :          9 :         rprefix.insert(rprefix.end(), children.begin(), children.begin() + i);
     481                 :          9 :         Node rpn = utils::mkConcat(rprefix, nm->regExpType());
     482                 :            :         Node substrPrefix =
     483                 :            :             nm->mkNode(Kind::STRING_IN_REGEXP,
     484                 :         18 :                        nm->mkNode(Kind::STRING_SUBSTR, x, zero, k),
     485                 :         27 :                        rpn);
     486                 :          9 :         echildren.push_back(substrPrefix);
     487                 :          9 :       }
     488         [ +  - ]:          9 :       if (i + 1 < nchildren)
     489                 :            :       {
     490                 :          9 :         std::vector<Node> rsuffix;
     491                 :          9 :         rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end());
     492                 :          9 :         Node rps = utils::mkConcat(rsuffix, nm->regExpType());
     493                 :         18 :         Node ks = nm->mkNode(Kind::ADD, k, lens);
     494                 :            :         Node substrSuffix = nm->mkNode(
     495                 :            :             Kind::STRING_IN_REGEXP,
     496                 :         18 :             nm->mkNode(
     497                 :         18 :                 Kind::STRING_SUBSTR, x, ks, nm->mkNode(Kind::SUB, lenx, ks)),
     498                 :         27 :             rps);
     499                 :          9 :         echildren.push_back(substrSuffix);
     500                 :          9 :       }
     501                 :          9 :       Node body = nm->mkNode(Kind::AND, echildren);
     502         [ +  - ]:          9 :       if (k.getKind() == Kind::BOUND_VARIABLE)
     503                 :            :       {
     504                 :          9 :         Node bvl = nm->mkNode(Kind::BOUND_VAR_LIST, k);
     505                 :          9 :         body = utils::mkForallInternal(nm, bvl, body.negate()).negate();
     506                 :          9 :       }
     507                 :            :       // e.g. x in re.++( R1, "AB", R2 ) --->
     508                 :            :       //  exists k.
     509                 :            :       //    0 <= k <= (len(x)-2) ^
     510                 :            :       //    substr( x, k, 2 ) = "AB" ^
     511                 :            :       //    substr( x, 0, k ) in R1 ^
     512                 :            :       //    substr( x, k+2, len(x)-(k+2) ) in R2
     513                 :          9 :       return returnElim(atom, body, "concat-find");
     514                 :          9 :     }
     515                 :            :   }
     516                 :          0 :   return Node::null();
     517                 :        158 : }
     518                 :            : 
     519                 :        106 : Node RegExpElimination::eliminateStar(Node atom, bool isAgg)
     520                 :            : {
     521         [ +  + ]:        106 :   if (!isAgg)
     522                 :            :   {
     523                 :         20 :     return Node::null();
     524                 :            :   }
     525                 :            :   // only aggressive rewrites below here
     526                 :            : 
     527                 :         86 :   NodeManager* nm = atom.getNodeManager();
     528                 :         86 :   BoundVarManager* bvm = nm->getBoundVarManager();
     529                 :         86 :   Node x = atom[0];
     530                 :         86 :   Node lenx = nm->mkNode(Kind::STRING_LENGTH, x);
     531                 :         86 :   Node re = atom[1];
     532                 :         86 :   Node zero = nm->mkConstInt(Rational(0));
     533                 :            :   // for regular expression star,
     534                 :            :   // if the period is a fixed constant, we can turn it into a bounded
     535                 :            :   // quantifier
     536                 :         86 :   std::vector<Node> disj;
     537         [ +  + ]:         86 :   if (re[0].getKind() == Kind::REGEXP_UNION)
     538                 :            :   {
     539         [ +  + ]:         27 :     for (const Node& r : re[0])
     540                 :            :     {
     541                 :         18 :       disj.push_back(r);
     542                 :         27 :     }
     543                 :            :   }
     544                 :            :   else
     545                 :            :   {
     546                 :         77 :     disj.push_back(re[0]);
     547                 :            :   }
     548                 :         86 :   bool lenOnePeriod = true;
     549                 :         86 :   std::vector<Node> char_constraints;
     550                 :         86 :   TypeNode intType = nm->integerType();
     551                 :            :   Node index =
     552                 :        172 :       bvm->mkBoundVar(BoundVarId::STRINGS_RE_ELIM_STAR_INDEX, atom, intType);
     553                 :            :   Node substr_ch =
     554                 :        172 :       nm->mkNode(Kind::STRING_SUBSTR, x, index, nm->mkConstInt(Rational(1)));
     555                 :            :   // handle the case where it is purely characters
     556         [ +  + ]:        133 :   for (const Node& r : disj)
     557                 :            :   {
     558 [ -  + ][ -  + ]:         95 :     Assert(r.getKind() != Kind::REGEXP_UNION);
                 [ -  - ]
     559 [ -  + ][ -  + ]:         95 :     Assert(r.getKind() != Kind::REGEXP_ALLCHAR);
                 [ -  - ]
     560                 :         95 :     lenOnePeriod = false;
     561                 :            :     // lenOnePeriod is true if this regular expression is a single character
     562                 :            :     // regular expression
     563         [ +  + ]:         95 :     if (r.getKind() == Kind::STRING_TO_REGEXP)
     564                 :            :     {
     565                 :         77 :       Node s = r[0];
     566 [ +  + ][ +  + ]:         77 :       if (s.isConst() && s.getConst<String>().size() == 1)
                 [ +  + ]
     567                 :            :       {
     568                 :         38 :         lenOnePeriod = true;
     569                 :            :       }
     570                 :         77 :     }
     571         [ +  + ]:         18 :     else if (r.getKind() == Kind::REGEXP_RANGE)
     572                 :            :     {
     573                 :          9 :       lenOnePeriod = true;
     574                 :            :     }
     575         [ +  + ]:         95 :     if (!lenOnePeriod)
     576                 :            :     {
     577                 :         48 :       break;
     578                 :            :     }
     579                 :            :     else
     580                 :            :     {
     581                 :         94 :       Node regexp_ch = nm->mkNode(Kind::STRING_IN_REGEXP, substr_ch, r);
     582                 :         47 :       char_constraints.push_back(regexp_ch);
     583                 :         47 :     }
     584                 :            :   }
     585         [ +  + ]:         86 :   if (lenOnePeriod)
     586                 :            :   {
     587 [ -  + ][ -  + ]:         38 :     Assert(!char_constraints.empty());
                 [ -  - ]
     588                 :        114 :     Node bound = nm->mkNode(Kind::AND,
     589                 :         76 :                             {nm->mkNode(Kind::LEQ, zero, index),
     590                 :         76 :                              nm->mkNode(Kind::LT, index, lenx)});
     591                 :         38 :     Node conc = char_constraints.size() == 1
     592                 :         38 :                     ? char_constraints[0]
     593         [ +  - ]:         76 :                     : nm->mkNode(Kind::OR, char_constraints);
     594                 :         76 :     Node body = nm->mkNode(Kind::OR, bound.negate(), conc);
     595                 :         38 :     Node bvl = nm->mkNode(Kind::BOUND_VAR_LIST, index);
     596                 :         76 :     Node res = utils::mkForallInternal(nm, bvl, body);
     597                 :            :     // e.g.
     598                 :            :     //   x in (re.* (re.union "A" "B" )) --->
     599                 :            :     //   forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B")
     600                 :         38 :     return returnElim(atom, res, "star-char");
     601                 :         38 :   }
     602                 :            :   // otherwise, for stars of constant length these are periodic
     603         [ +  + ]:         48 :   if (disj.size() == 1)
     604                 :            :   {
     605                 :         39 :     Node r = disj[0];
     606         [ +  - ]:         39 :     if (r.getKind() == Kind::STRING_TO_REGEXP)
     607                 :            :     {
     608                 :         39 :       Node s = r[0];
     609         [ +  + ]:         39 :       if (s.isConst())
     610                 :            :       {
     611                 :         70 :         Node lens = nm->mkConstInt(Word::getLength(s));
     612 [ -  + ][ -  + ]:         35 :         Assert(lens.getConst<Rational>().sgn() > 0);
                 [ -  - ]
     613                 :         35 :         std::vector<Node> conj;
     614                 :            :         // lens is a positive constant, so it is safe to use total div/mod here.
     615                 :        105 :         Node bound = nm->mkNode(
     616                 :            :             Kind::AND,
     617                 :         70 :             {nm->mkNode(Kind::LEQ, zero, index),
     618                 :         70 :              nm->mkNode(Kind::LT,
     619                 :            :                         index,
     620                 :        140 :                         nm->mkNode(Kind::INTS_DIVISION_TOTAL, lenx, lens))});
     621                 :        105 :         Node conc = nm->mkNode(Kind::STRING_SUBSTR,
     622                 :            :                                x,
     623                 :         70 :                                nm->mkNode(Kind::MULT, index, lens),
     624                 :            :                                lens)
     625                 :         35 :                         .eqNode(s);
     626                 :         70 :         Node body = nm->mkNode(Kind::OR, bound.negate(), conc);
     627                 :         35 :         Node bvl = nm->mkNode(Kind::BOUND_VAR_LIST, index);
     628                 :         70 :         Node res = utils::mkForallInternal(nm, bvl, body);
     629                 :        105 :         res = nm->mkNode(
     630                 :            :             Kind::AND,
     631                 :         70 :             nm->mkNode(Kind::INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero),
     632                 :         35 :             res);
     633                 :            :         // e.g.
     634                 :            :         //    x in ("abc")* --->
     635                 :            :         //    forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^
     636                 :            :         //    len(x) mod 3 = 0
     637                 :         35 :         return returnElim(atom, res, "star-constant");
     638                 :         35 :       }
     639         [ +  + ]:         39 :     }
     640         [ +  + ]:         39 :   }
     641                 :         13 :   return Node::null();
     642                 :         86 : }
     643                 :            : 
     644                 :        196 : Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
     645                 :            : {
     646         [ +  - ]:        392 :   Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id
     647                 :        196 :                    << "." << std::endl;
     648                 :        196 :   return atomElim;
     649                 :            : }
     650                 :        196 : bool RegExpElimination::isProofEnabled() const
     651                 :            : {
     652                 :        196 :   return d_env.isTheoryProofProducing();
     653                 :            : }
     654                 :            : 
     655                 :            : }  // namespace strings
     656                 :            : }  // namespace theory
     657                 :            : }  // namespace cvc5::internal

Generated by: LCOV version 1.14