LCOV - code coverage report
Current view: top level - buildbot/coverage/build/src/util - string.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 20 21 95.2 %
Date: 2025-03-14 11:52:04 Functions: 13 14 92.9 %
Branches: 1 2 50.0 %

           Branch data     Line data    Source code
       1                 :            : /******************************************************************************
       2                 :            :  * Top contributors (to current version):
       3                 :            :  *   Andrew Reynolds, Tim King, Andres Noetzli
       4                 :            :  *
       5                 :            :  * This file is part of the cvc5 project.
       6                 :            :  *
       7                 :            :  * Copyright (c) 2009-2025 by the authors listed in the file AUTHORS
       8                 :            :  * in the top-level source directory and their institutional affiliations.
       9                 :            :  * All rights reserved.  See the file COPYING in the top-level source
      10                 :            :  * directory for licensing information.
      11                 :            :  * ****************************************************************************
      12                 :            :  *
      13                 :            :  * The string data type.
      14                 :            :  */
      15                 :            : 
      16                 :            : #include "cvc5_public.h"
      17                 :            : 
      18                 :            : #ifndef CVC5__UTIL__STRING_H
      19                 :            : #define CVC5__UTIL__STRING_H
      20                 :            : 
      21                 :            : #include <iosfwd>
      22                 :            : #include <string>
      23                 :            : #include <vector>
      24                 :            : 
      25                 :            : #include "util/rational.h"
      26                 :            : 
      27                 :            : namespace cvc5::internal {
      28                 :            : 
      29                 :            : namespace strings {
      30                 :            : struct StringHashFunction;
      31                 :            : }
      32                 :            : 
      33                 :            : /** The cvc5 string class
      34                 :            :  *
      35                 :            :  * This data structure is the domain of values for the string type. It can also
      36                 :            :  * be used as a generic utility for representing strings.
      37                 :            :  */
      38                 :            : class String
      39                 :            : {
      40                 :            :   friend strings::StringHashFunction;
      41                 :            : 
      42                 :            :  public:
      43                 :            :   /**
      44                 :            :    * This is the cardinality of the alphabet that is representable by this
      45                 :            :    * class. Notice that this must be greater than or equal to the cardinality
      46                 :            :    * of the alphabet that the string theory reasons about.
      47                 :            :    *
      48                 :            :    * This must be strictly less than std::numeric_limits<unsigned>::max().
      49                 :            :    *
      50                 :            :    * As per the SMT-LIB standard for strings, we support the first 3 planes of
      51                 :            :    * Unicode characters, where 196608 = 3*16^4.
      52                 :            :    */
      53                 :    2849030 :   static inline unsigned num_codes() { return 196608; }
      54                 :            :   /** constructors for String
      55                 :            :    *
      56                 :            :    * Internally, a cvc5::internal::String is represented by a vector of unsigned
      57                 :            :    * integers (d_str) representing the code points of the characters.
      58                 :            :    *
      59                 :            :    * To build a string from a C++ string, we may process escape sequences
      60                 :            :    * according to the SMT-LIB standard. In particular, if useEscSequences is
      61                 :            :    * true, we convert unicode escape sequences:
      62                 :            :    *  \u d_3 d_2 d_1 d_0
      63                 :            :    *  \u{d_0}
      64                 :            :    *  \u{d_1 d_0}
      65                 :            :    *  \u{d_2 d_1 d_0}
      66                 :            :    *  \u{d_3 d_2 d_1 d_0}
      67                 :            :    *  \u{d_4 d_3 d_2 d_1 d_0}
      68                 :            :    * where d_0 ... d_4 are hexadecimal digits, to the appropriate character.
      69                 :            :    *
      70                 :            :    * If useEscSequences is false, then the characters of the constructed
      71                 :            :    * cvc5::internal::String correspond one-to-one with the input string.
      72                 :            :    */
      73                 :    1132930 :   String() = default;
      74                 :      38470 :   explicit String(const std::string& s, bool useEscSequences = false)
      75                 :      38470 :       : d_str(toInternal(s, useEscSequences))
      76                 :            :   {
      77                 :      38470 :   }
      78                 :            :   explicit String(const std::wstring& s);
      79                 :      82057 :   explicit String(const char* s, bool useEscSequences = false)
      80                 :      82057 :       : d_str(toInternal(std::string(s), useEscSequences))
      81                 :            :   {
      82                 :      82057 :   }
      83                 :            :   explicit String(const std::vector<unsigned>& s);
      84                 :            : 
      85                 :    1261980 :   String& operator=(const String& y) {
      86         [ +  - ]:    1261980 :     if (this != &y) {
      87                 :    1261980 :       d_str = y.d_str;
      88                 :            :     }
      89                 :    1261980 :     return *this;
      90                 :            :   }
      91                 :            : 
      92                 :            :   String concat(const String& other) const;
      93                 :            : 
      94                 :    5750030 :   bool operator==(const String& y) const { return cmp(y) == 0; }
      95                 :        147 :   bool operator!=(const String& y) const { return cmp(y) != 0; }
      96                 :         48 :   bool operator<(const String& y) const { return cmp(y) < 0; }
      97                 :            :   bool operator>(const String& y) const { return cmp(y) > 0; }
      98                 :          0 :   bool operator<=(const String& y) const { return cmp(y) <= 0; }
      99                 :            :   bool operator>=(const String& y) const { return cmp(y) >= 0; }
     100                 :            : 
     101                 :            :   /**
     102                 :            :    * Returns true if this string is equal to y for their first n characters.
     103                 :            :    * If n is larger than the length of this string or y, this method returns
     104                 :            :    * true if and only if this string is equal to y.
     105                 :            :    */
     106                 :            :   bool strncmp(const String& y, std::size_t n) const;
     107                 :            :   /**
     108                 :            :    * Returns true if this string is equal to y for their last n characters.
     109                 :            :    * Similar to strncmp, if n is larger than the length of this string or y,
     110                 :            :    * this method returns true if and only if this string is equal to y.
     111                 :            :    */
     112                 :            :   bool rstrncmp(const String& y, std::size_t n) const;
     113                 :            : 
     114                 :            :   /* toString
     115                 :            :    * Converts this string to a std::string.
     116                 :            :    *
     117                 :            :    * The unprintable characters are converted to unicode escape sequences as
     118                 :            :    * described above.
     119                 :            :    *
     120                 :            :    * If useEscSequences is false, the string's printable characters are
     121                 :            :    * printed as characters. Notice that for all std::string s having only
     122                 :            :    * printable characters, we have that
     123                 :            :    *    cvc5::internal::String( s ).toString() = s.
     124                 :            :    */
     125                 :            :   std::string toString(bool useEscSequences = false) const;
     126                 :            :   /* toWString
     127                 :            :    * Converts this string to a std::wstring.
     128                 :            :    *
     129                 :            :    * Unlike toString(), this method uses no escape sequences as both this class
     130                 :            :    * and std::wstring use 32bit characters.
     131                 :            :    */
     132                 :            :   std::wstring toWString() const;
     133                 :            :   /** is this the empty string? */
     134                 :     210756 :   bool empty() const { return d_str.empty(); }
     135                 :            :   /** is less than or equal to string y */
     136                 :            :   bool isLeq(const String& y) const;
     137                 :            :   /** Return the length of the string */
     138                 :   25225300 :   std::size_t size() const { return d_str.size(); }
     139                 :            : 
     140                 :            :   bool isRepeated() const;
     141                 :            :   bool tailcmp(const String& y, int& c) const;
     142                 :            : 
     143                 :            :   /**
     144                 :            :    * Return the first position y occurs in this string, or std::string::npos
     145                 :            :    * otherwise.
     146                 :            :    */
     147                 :            :   std::size_t find(const String& y, const std::size_t start = 0) const;
     148                 :            :   /**
     149                 :            :    * Return the first position y occurs in this string searching from the end,
     150                 :            :    * or std::string::npos otherwise.
     151                 :            :    */
     152                 :            :   std::size_t rfind(const String& y, const std::size_t start = 0) const;
     153                 :            :   /** Returns true if y is a prefix of this */
     154                 :            :   bool hasPrefix(const String& y) const;
     155                 :            :   /** Returns true if y is a suffix of this */
     156                 :            :   bool hasSuffix(const String& y) const;
     157                 :            :   /** Replace the character at index i in this string with t */
     158                 :            :   String update(std::size_t i, const String& t) const;
     159                 :            :   /** Replace the first occurrence of s in this string with t */
     160                 :            :   String replace(const String& s, const String& t) const;
     161                 :            :   /** Return the substring of this string starting at index i */
     162                 :            :   String substr(std::size_t i) const;
     163                 :            :   /** Return the substring of this string starting at index i with size at most
     164                 :            :    * j */
     165                 :            :   String substr(std::size_t i, std::size_t j) const;
     166                 :            :   /** Return the prefix of this string of size at most i */
     167                 :      16008 :   String prefix(std::size_t i) const { return substr(0, i); }
     168                 :            :   /** Return the suffix of this string of size at most i */
     169                 :      44968 :   String suffix(std::size_t i) const { return substr(size() - i, i); }
     170                 :            : 
     171                 :            :   /** string overlap
     172                 :            :   *
     173                 :            :   * if overlap returns m>0,
     174                 :            :   * then the maximal suffix of this string that is a prefix of y is of length m.
     175                 :            :   *
     176                 :            :   * For example, if x is "abcdef", then:
     177                 :            :   * x.overlap("defg") = 3
     178                 :            :   * x.overlap("ab") = 0
     179                 :            :   * x.overlap("d") = 0
     180                 :            :   * x.overlap("bcdefdef") = 5
     181                 :            :   */
     182                 :            :   std::size_t overlap(const String& y) const;
     183                 :            :   /** string reverse overlap
     184                 :            :   *
     185                 :            :   * if roverlap returns m>0,
     186                 :            :   * then the maximal prefix of this string that is a suffix of y is of length m.
     187                 :            :   *
     188                 :            :   * For example, if x is "abcdef", then:
     189                 :            :   * x.roverlap("aaabc") = 3
     190                 :            :   * x.roverlap("def") = 0
     191                 :            :   * x.roverlap("d") = 0
     192                 :            :   * x.roverlap("defabcde") = 5
     193                 :            :   *
     194                 :            :   * Notice that x.overlap(y) = y.roverlap(x)
     195                 :            :   */
     196                 :            :   std::size_t roverlap(const String& y) const;
     197                 :            : 
     198                 :            :   /**
     199                 :            :    * Returns true if this string corresponds in text to a number, for example
     200                 :            :    * this returns true for strings "7", "12", "004", "0" and false for strings
     201                 :            :    * "abc", "4a", "-4", "".
     202                 :            :    */
     203                 :            :   bool isNumber() const;
     204                 :            :   /** Returns the corresponding rational for the text of this string. */
     205                 :            :   Rational toNumber() const;
     206                 :            :   /** Get the unsigned representation (code points) of this string */
     207                 :     342045 :   const std::vector<unsigned>& getVec() const { return d_str; }
     208                 :            :   /**
     209                 :            :    * Get the unsigned (code point) value of the first character in this string
     210                 :            :    */
     211                 :            :   unsigned front() const;
     212                 :            :   /**
     213                 :            :    * Get the unsigned (code point) value of the last character in this string
     214                 :            :    */
     215                 :            :   unsigned back() const;
     216                 :            :   /** is the unsigned a digit?
     217                 :            :    *
     218                 :            :    * This is true for code points between 48 ('0') and 57 ('9').
     219                 :            :    */
     220                 :            :   static bool isDigit(unsigned character);
     221                 :            :   /** is the unsigned a hexadecimal digit?
     222                 :            :    *
     223                 :            :    * This is true for code points between 48 ('0') and 57 ('9'), code points
     224                 :            :    * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
     225                 :            :    */
     226                 :            :   static bool isHexDigit(unsigned character);
     227                 :            :   /** is the unsigned a printable code point?
     228                 :            :    *
     229                 :            :    * This is true for Unicode 32 (' ') to 126 ('~').
     230                 :            :    */
     231                 :            :   static bool isPrintable(unsigned character);
     232                 :            : 
     233                 :            :   /**
     234                 :            :    * Returns the maximum length of string representable by this class.
     235                 :            :    * Corresponds to the maximum size of d_str.
     236                 :            :    */
     237                 :            :   static size_t maxSize();
     238                 :            :  private:
     239                 :            :   /**
     240                 :            :    * Helper for toInternal: add character ch to vector vec, storing a string in
     241                 :            :    * internal format. This throws an error if ch is not a printable character,
     242                 :            :    * since non-printable characters must be escaped in SMT-LIB.
     243                 :            :    */
     244                 :            :   static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
     245                 :            :   /**
     246                 :            :    * Convert the string s to the internal format (vector of code points).
     247                 :            :    * The argument useEscSequences is whether to process unicode escape
     248                 :            :    * sequences.
     249                 :            :    */
     250                 :            :   static std::vector<unsigned> toInternal(const std::string& s,
     251                 :            :                                           bool useEscSequences);
     252                 :            : 
     253                 :            :   /**
     254                 :            :    * Returns a negative number if *this < y, 0 if *this and y are equal and a
     255                 :            :    * positive number if *this > y.
     256                 :            :    */
     257                 :            :   int cmp(const String& y) const;
     258                 :            : 
     259                 :            :   std::vector<unsigned> d_str;
     260                 :            : }; /* class String */
     261                 :            : 
     262                 :            : namespace strings {
     263                 :            : 
     264                 :            : struct StringHashFunction
     265                 :            : {
     266                 :            :   size_t operator()(const cvc5::internal::String& s) const;
     267                 :            : }; /* struct StringHashFunction */
     268                 :            : 
     269                 :            : }  // namespace strings
     270                 :            : 
     271                 :            : std::ostream& operator<<(std::ostream& os, const String& s);
     272                 :            : 
     273                 :            : }  // namespace cvc5::internal
     274                 :            : 
     275                 :            : #endif /* CVC5__UTIL__STRING_H */

Generated by: LCOV version 1.14