LCOV - stap.info - /mnt/wasteland/wcohen/systemtap_write/systemtap/re2c-migrate/stapregex.cxx

LCOV - code coverage report

Current view:	top level - mnt/wasteland/wcohen/systemtap_write/systemtap/re2c-migrate - stapregex.cxx (source / functions)		Hit	Total	Coverage
Test:	stap.info	Lines:	183	256	71.5 %
Date:	2013-03-08	Functions:	21	27	77.8 %
		Branches:	218	509	42.8 %

           Branch data     Line data    Source code

       1                 :            : // -*- C++ -*-
       2                 :            : // Copyright (C) 2012-2013 Red Hat Inc.
       3                 :            : //
       4                 :            : // This file is part of systemtap, and is free software.  You can
       5                 :            : // redistribute it and/or modify it under the terms of the GNU General
       6                 :            : // Public License (GPL); either version 2, or (at your option) any
       7                 :            : // later version.
       8                 :            : //
       9                 :            : // ---
      10                 :            : //
      11                 :            : // This file incorporates code from the re2c project; please see
      12                 :            : // re2c-migrate/README for details.
      13                 :            : 
      14                 :            : #include "stapregex.h"
      15                 :            : #include "../translate.h"
      16                 :            : #include "../session.h"
      17                 :            : #include "../util.h"
      18                 :            : 
      19                 :            : #include <iostream>
      20                 :            : #include <cstdlib>
      21                 :            : #include <string>
      22                 :            : 
      23                 :            : using namespace std;
      24                 :            : 
      25                 :            : // ---------------------------------------------------------------------
      26                 :            : 
      27                 :            : // TODOXXX support for standalone regcomp without ugly duplicate code
      28                 :            : #ifdef REGCOMP_STANDALONE
      29                 :            : 
      30                 :            : extern "C" {
      31                 :            : #include <stdio.h>
      32                 :            : #include <stdlib.h>
      33                 :            : #include <unistd.h>
      34                 :            : #include <stdarg.h>
      35                 :            : }
      36                 :            : 
      37                 :            : /* just the bare minimum required for regtest */
      38                 :            : 
      39                 :            : std::string autosprintf(const char* format, ...)
      40                 :            : {
      41                 :            :   va_list args;
      42                 :            :   char *str;
      43                 :            :   va_start (args, format);
      44                 :            :   int rc = vasprintf (&str, format, args);
      45                 :            :   if (rc < 0)
      46                 :            :     {
      47                 :            :       va_end(args);
      48                 :            :       return _F("autosprintf/vasprintf error %d", rc);
      49                 :            :     }
      50                 :            :   string s = str;
      51                 :            :   va_end (args);
      52                 :            :   free (str);
      53                 :            :   return s; /* by copy */
      54                 :            : }
      55                 :            : 
      56                 :            : translator_output::translator_output (ostream& f):
      57                 :            :   buf(0), o2 (0), o (f), tablevel (0)
      58                 :            : {
      59                 :            : }
      60                 :            : 
      61                 :            : 
      62                 :            : translator_output::translator_output (const string& filename, size_t bufsize):
      63                 :            :   buf (new char[bufsize]),
      64                 :            :   o2 (new ofstream (filename.c_str ())),
      65                 :            :   o (*o2),
      66                 :            :   tablevel (0),
      67                 :            :   filename (filename)
      68                 :            : {
      69                 :            :   o2->rdbuf()->pubsetbuf(buf, bufsize);
      70                 :            : }
      71                 :            : 
      72                 :            : 
      73                 :            : translator_output::~translator_output ()
      74                 :            : {
      75                 :            :   delete o2;
      76                 :            :   delete [] buf;
      77                 :            : }
      78                 :            : 
      79                 :            : 
      80                 :            : ostream&
      81                 :            : translator_output::newline (int indent)
      82                 :            : {
      83                 :            :   if (!  (indent > 0 || tablevel >= (unsigned)-indent)) o.flush ();
      84                 :            :   assert (indent > 0 || tablevel >= (unsigned)-indent);
      85                 :            : 
      86                 :            :   tablevel += indent;
      87                 :            :   o << "\n";
      88                 :            :   for (unsigned i=0; i<tablevel; i++)
      89                 :            :     o << "  ";
      90                 :            :   return o;
      91                 :            : }
      92                 :            : 
      93                 :            : 
      94                 :            : void
      95                 :            : translator_output::indent (int indent)
      96                 :            : {
      97                 :            :   if (!  (indent > 0 || tablevel >= (unsigned)-indent)) o.flush ();
      98                 :            :   assert (indent > 0 || tablevel >= (unsigned)-indent);
      99                 :            :   tablevel += indent;
     100                 :            : }
     101                 :            : 
     102                 :            : 
     103                 :            : ostream&
     104                 :            : translator_output::line ()
     105                 :            : {
     106                 :            :   return o;
     107                 :            : }
     108                 :            : 
     109                 :            : #endif
     110                 :            : 
     111                 :            : // ---------------------------------------------------------------------
     112                 :            : 
     113                 :            : // Systemtap string literals are stored without substituting any
     114                 :            : // escape codes, so we need to do that here:
     115                 :            : 
     116                 :            : string
     117                 :         24 : escape_string_literal(const string &old_input)
     118                 :            : {
     119         [ +  - ]:         24 :   string input("");
     120 [ +  - ][ +  + ]:        304 :   for (unsigned i = 0; i < old_input.size(); i++)
     121                 :            :     {
     122         [ +  - ]:        280 :       char c = old_input[i];
     123                 :            : 
     124         [ +  + ]:        280 :       if (c == '\\')
     125                 :            :         {
     126                 :         15 :           i++;
     127 [ +  - ][ -  + ]:         15 :           assert (i < old_input.size()); // XXX should be caught by stap parser
     128         [ +  - ]:         15 :           c = old_input[i];
     129                 :            : 
     130   [ -  -  -  -  :         15 :           switch (c)
             -  -  -  + ]
     131                 :            :             {
     132                 :            :             case 'a':
     133                 :          0 :               c = '\a'; break;
     134                 :            :             case 'b':
     135                 :          0 :               c = '\b'; break;
     136                 :            :             case 't':
     137                 :          0 :               c = '\t'; break;
     138                 :            :             case 'n':
     139                 :          0 :               c = '\n'; break;
     140                 :            :             case 'v':
     141                 :          0 :               c = '\v'; break;
     142                 :            :             case 'f':
     143                 :          0 :               c = '\f'; break;
     144                 :            :             case 'r':
     145                 :          0 :               c = '\r'; break;
     146                 :            : 
     147                 :            :             // none of the following should be converted
     148                 :            :             case '0' ... '7': // NB: need only match the first digit
     149                 :            :               // TODOXXX octals should be handled by re2c??
     150                 :            :             case '\\':
     151                 :            :             default:
     152                 :         15 :                   break;
     153                 :            :             }
     154                 :            :         }
     155                 :            : 
     156         [ +  - ]:        280 :       input.push_back(c);
     157                 :            :     }
     158                 :         24 :   return input;
     159                 :            : }
     160                 :            : 
     161                 :            : // ---------------------------------------------------------------------
     162                 :            : 
     163                 :            : #include "re2c-globals.h"
     164                 :            : #include "re2c-dfa.h"
     165                 :            : #include "re2c-regex.h"
     166                 :            : 
     167                 :            : using namespace re2c;
     168                 :            : 
     169                 :         72 : class regex_parser {
     170                 :            : public:
     171                 :         72 :   regex_parser (const string& input) : input(input) {}
     172                 :            :   RegExp *parse ();
     173                 :            : 
     174                 :            : private:
     175                 :            :   string input;
     176                 :            : 
     177                 :            :   // scan state
     178                 :            :   char next ();
     179                 :            :   char peek ();
     180                 :            :   bool finished ();
     181                 :            : 
     182                 :            :   unsigned next_pos; // pos of next char to be returned
     183                 :            :   unsigned last_pos; // pos of last returned char
     184                 :            :   char next_c;
     185                 :            :   char last_c;
     186                 :            : 
     187                 :            :   // TODOXXX throughout: re2c errors should become parse errors
     188                 :            :   void parse_error (const string& msg, unsigned pos);
     189                 :            :   void parse_error (const string& msg); // report error at last_pos
     190                 :            : 
     191                 :            :   // character classes
     192                 :            :   bool isspecial (char c); // any of .[{()\*+?|^$
     193                 :            : 
     194                 :            :   // expectations
     195                 :            :   void expect (char expected);
     196                 :            : 
     197                 :            : private: // re2c hackery
     198                 :            :   Scanner *sc;
     199                 :            : 
     200                 :            : private: // nonterminals
     201                 :            :   RegExp *parse_expr ();
     202                 :            :   RegExp *parse_term ();
     203                 :            :   RegExp *parse_factor ();
     204                 :            :   RegExp *parse_char_range ();
     205                 :            :   unsigned parse_number ();
     206                 :            : };
     207                 :            : 
     208                 :            : 
     209                 :            : // ------------------------------------------------------------------------
     210                 :            : 
     211                 :            : stapdfa * 
     212                 :         45 : regex_to_stapdfa (systemtap_session *s, const string& input, unsigned& counter)
     213                 :            : {
     214 [ +  - ][ +  + ]:         45 :   if (s->dfas.find(input) != s->dfas.end())
     215                 :         21 :     return s->dfas[input];
     216                 :            : 
     217 [ +  - ][ +  - ]:         45 :   return s->dfas[input] = new stapdfa ("__stp_dfa" + lex_cast(counter++), input);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
                 [ +  - ]
     218                 :            : }
     219                 :            : 
     220                 :            : // ------------------------------------------------------------------------
     221                 :            : 
     222                 :            : RegExp *stapdfa::failRE = NULL;
     223                 :            : RegExp *stapdfa::padRE = NULL;
     224                 :            : 
     225                 :         24 : stapdfa::stapdfa (const string& func_name, const string& re, bool escape)
     226         [ +  - ]:         24 :   : orig_input(re), func_name(func_name)
     227                 :            : {
     228                 :            :   try
     229                 :            :     {
     230         [ +  - ]:         24 :       if (!failRE) {
     231                 :            :         //regex_parser p("[\\000-\\377]");
     232 [ +  - ][ +  - ]:         24 :         regex_parser p("");
                 [ +  - ]
     233 [ +  - ][ +  - ]:         24 :         failRE = p.parse();
     234                 :            :       }
     235         [ +  - ]:         24 :       if (!padRE) {
     236 [ +  - ][ +  - ]:         24 :         regex_parser p(".*");
                 [ +  - ]
     237 [ +  - ][ +  - ]:         24 :         padRE = p.parse();
     238                 :            :       }
     239                 :            : 
     240 [ +  - ][ +  - ]:         24 :       regex_parser p(escape ? escape_string_literal(re) : re);
         [ #  # ][ +  - ]
                 [ +  - ]
     241 [ +  - ][ +  - ]:         24 :       ast = prepare_rule(p.parse ()); // must be retained for re2c's reference
     242                 :            : 
     243                 :            :       // compile ast to DFA
     244         [ +  - ]:         24 :       content = genCode (ast);
     245 [ +  - ][ +  - ]:         24 :       content->prepare();
     246                 :            :     }
     247                 :            :   catch (const re2c_error &e)
     248                 :            :     {
     249                 :            : #ifdef REGCOMP_STANDALONE
     250                 :            :       cerr << e.what() << " (at " << e.pos << ")" << endl;
     251                 :            :       exit (1);
     252                 :            : #else
     253   [ #  #  #  # ]:            :       throw semantic_error (e.what());
     254                 :            : #endif
     255                 :            :     }
     256      [ #  #  # ]:            :   catch (const dfa_parse_error &e)
     257                 :            :     {
     258                 :            : #ifdef REGCOMP_STANDALONE
     259                 :            :       cerr << e.what() << " (at " << e.pos << ")" << endl;
     260                 :            :       exit (1);
     261                 :            : #else
     262   [ #  #  #  #  :            :       throw semantic_error (string("regex parse error: ") + e.what());
                   #  # ]
     263                 :            : #endif
     264                 :            :     }
     265                 :         24 : }
     266                 :            : 
     267                 :          0 : stapdfa::~stapdfa ()
     268                 :            : {
     269 [ #  # ][ #  # ]:          0 :   delete content;
     270 [ #  # ][ #  # ]:          0 :   delete ast;
     271         [ #  # ]:          0 : }
     272                 :            : 
     273                 :            : void
     274                 :         23 : stapdfa::emit_declaration (translator_output *o)
     275                 :            : {
     276                 :         23 :   o->newline() << "static int";
     277                 :         23 :   o->newline() << func_name << " (const char *cur)";
     278                 :         23 :   o->newline() << "{";
     279                 :         23 :   o->indent(1);
     280                 :            : 
     281                 :         23 :   o->newline() << "const char *start = cur;";
     282                 :         23 :   o->newline() << "unsigned l = strlen(cur) + 1;"; /* include \0 byte at end of string */
     283                 :         23 :   o->newline() << "const char *mar;";
     284                 :         23 :   o->newline() << "#define YYCTYPE char";
     285                 :         23 :   o->newline() << "#define YYCURSOR cur";
     286                 :         23 :   o->newline() << "#define YYLIMIT cur";
     287                 :         23 :   o->newline() << "#define YYMARKER mar";
     288                 :         23 :   o->newline() << "#define YYFILL(n) ({ if ((cur - start) + n > l) return 0; })";
     289                 :            : 
     290                 :            :   try
     291                 :            :     {
     292                 :         23 :       unsigned topIndent = 0;
     293                 :         23 :       bool bPrologBrace = false;
     294 [ +  - ][ +  - ]:         23 :       content->emit(o->newline(), topIndent, NULL, "", 0, bPrologBrace);
         [ +  - ][ +  - ]
     295                 :            :     }
     296         [ #  # ]:            :   catch (const re2c_error &e)
     297                 :            :     {
     298                 :            : #ifdef REGCOMP_STANDALONE
     299                 :            :       cerr << e.what() << " (at " << e.pos << ")" << endl;
     300                 :            :       exit (1);
     301                 :            : #else
     302   [ #  #  #  # ]:            :       throw semantic_error (e.what());
     303                 :            : #endif
     304                 :            :     }
     305                 :            : 
     306                 :         23 :   o->newline() << "#undef YYCTYPE";
     307                 :         23 :   o->newline() << "#undef YYCURSOR";
     308                 :         23 :   o->newline() << "#undef YYLIMIT";
     309                 :         23 :   o->newline() << "#undef YYMARKER";
     310                 :         23 :   o->newline() << "#undef YYFILL";
     311                 :            : 
     312                 :         23 :   o->newline(-1) << "}";
     313                 :         23 : }
     314                 :            : 
     315                 :            : void
     316                 :         44 : stapdfa::emit_matchop_start (translator_output *o)
     317                 :            : {
     318                 :            :   // TODOXXX eventually imitate visit_functioncall in translate.cxx??
     319                 :         44 :   o->line() << "(" << func_name << " (";
     320                 :         44 : }
     321                 :            : 
     322                 :            : void
     323                 :         44 : stapdfa::emit_matchop_end (translator_output *o)
     324                 :            : {
     325                 :            :   // TODOXXX eventually imitate visit_functioncall in translate.cxx??
     326                 :         44 :   o->line() << ")" << ")";
     327                 :         44 : }
     328                 :            : 
     329                 :            : RegExp *
     330                 :         24 : stapdfa::prepare_rule (RegExp *expr)
     331                 :            : {
     332                 :            :   // Enable regex match to start at any point in the string:
     333 [ +  - ][ +  - ]:         24 :   if (!expr->anchored) expr = new CatOp (padRE, expr);
                 [ +  - ]
     334                 :            : 
     335                 :            : #define CODE_YES "{ return 1; }"
     336                 :            : #define CODE_NO "{ return 0; }"
     337                 :         24 :   SubStr codeYes(CODE_YES);
     338 [ +  - ][ +  - ]:         24 :   Token *tokenYes = new Token(codeYes, CODE_YES, 0);
         [ +  - ][ +  - ]
     339                 :         24 :   SubStr codeNo(CODE_NO);
     340 [ +  - ][ +  - ]:         24 :   Token *tokenNo = new Token(codeNo, CODE_NO, 0);
         [ +  - ][ +  - ]
     341                 :            : 
     342 [ +  - ][ +  - ]:         24 :   RegExp *nope = new NullOp;
     343                 :            : 
     344                 :            :   // To ensure separate outcomes for each alternative (match or fail),
     345                 :            :   // simply give different accept parameters to the RuleOp
     346                 :            :   // constructor:
     347 [ +  - ][ +  - ]:         24 :   RegExp *resMatch = new RuleOp(expr, nope, tokenYes, 0);
     348 [ +  - ][ +  - ]:         24 :   RegExp *resFail = new RuleOp(failRE, nope, tokenNo, 1);
     349                 :            : 
     350         [ +  - ]:         24 :   return mkAlt(resMatch, resFail);
     351                 :            : }
     352                 :            : 
     353                 :            : void
     354                 :          0 : stapdfa::print (std::ostream& o) const
     355                 :            : {
     356                 :            :   // TODOXXX escape special chars in orig_input
     357                 :          0 :   o << "dfa(" << func_name << ",\"" << orig_input << "\")" << endl;
     358                 :          0 :   o << content << endl;
     359                 :            :   // TODOXXX properly indent and delineate content
     360                 :          0 : }
     361                 :            : 
     362                 :            : std::ostream&
     363                 :          0 : operator << (std::ostream &o, const stapdfa& d)
     364                 :            : {
     365                 :          0 :   d.print (o);
     366                 :          0 :   return o;
     367                 :            : }
     368                 :            : 
     369                 :            : // ------------------------------------------------------------------------
     370                 :            : 
     371                 :            : RegExp *
     372                 :         72 : regex_parser::parse ()
     373                 :            : {
     374         [ +  - ]:         72 :   sc = new Scanner(cin, cout); // cin/cout are actually ignored here...
     375                 :         72 :   next_c = 0; last_c = 0;
     376                 :         72 :   next_pos = 0; last_pos = 0;
     377                 :            : 
     378                 :         72 :   RegExp *result = parse_expr ();
     379                 :            : 
     380         [ -  + ]:         72 :   if (! finished ())
     381                 :            :     {
     382                 :          0 :       char c = peek ();
     383         [ #  # ]:          0 :       if (c == ')')
     384 [ #  # ][ #  # ]:          0 :         parse_error ("unbalanced ')'", next_pos);
                 [ #  # ]
     385                 :            :       else
     386                 :            :         // This should not be possible:
     387 [ #  # ][ #  # ]:          0 :         parse_error ("BUG -- regex parse failed to finish for unknown reasons", next_pos);
                 [ #  # ]
     388                 :            :     }
     389                 :            : 
     390         [ +  - ]:         72 :   delete sc;
     391                 :         72 :   return result;
     392                 :            : }
     393                 :            : 
     394                 :            : char
     395                 :        328 : regex_parser::next ()
     396                 :            : {
     397 [ -  + ][ #  # ]:        328 :   if (! next_c && finished ())
                 [ -  + ]
     398 [ #  # ][ #  # ]:          0 :     parse_error(_("unexpected end of regex"), next_pos);
                 [ #  # ]
     399         [ -  + ]:        328 :   if (! next_c)
     400                 :            :     {
     401                 :          0 :       last_pos = next_pos;
     402                 :          0 :       next_c = input[next_pos];
     403                 :          0 :       next_pos++;
     404                 :            :     }
     405                 :            : 
     406                 :        328 :   last_c = next_c;
     407                 :            :   // advance by zeroing next_c
     408                 :        328 :   next_c = 0;
     409                 :        328 :   return last_c;
     410                 :            : }
     411                 :            : 
     412                 :            : char
     413                 :        775 : regex_parser::peek ()
     414                 :            : {
     415 [ +  + ][ +  + ]:        775 :   if (! next_c && ! finished ())
                 [ +  + ]
     416                 :            :     {
     417                 :        328 :       last_pos = next_pos;
     418                 :        328 :       next_c = input[next_pos];
     419                 :        328 :       next_pos++;
     420                 :            :     }
     421                 :            : 
     422                 :            :   // don't advance by zeroing next_c
     423                 :        775 :   last_c = next_c;
     424                 :        775 :   return next_c;
     425                 :            : }
     426                 :            : 
     427                 :            : bool
     428                 :        629 : regex_parser::finished ()
     429                 :            : {
     430                 :        629 :   return ( next_pos >= input.size() );
     431                 :            : }
     432                 :            : 
     433                 :            : bool
     434                 :        297 : regex_parser::isspecial (char c)
     435                 :            : {
     436                 :            :   return ( c == '.' || c == '[' || c == '{' || c == '(' || c == ')'
     437                 :            :            || c == '\\' || c == '*' || c == '+' || c == '?' || c == '|'
     438 [ +  + ][ +  + ]:        297 :            || c == '^' || c == '$' );
         [ +  - ][ +  + ]
         [ +  + ][ +  + ]
         [ +  - ][ +  - ]
         [ +  - ][ +  + ]
         [ +  + ][ +  + ]
     439                 :            : }
     440                 :            : 
     441                 :            : void
     442                 :         32 : regex_parser::expect (char expected)
     443                 :            : {
     444                 :         32 :   char c = 0;
     445                 :            :   try {
     446         [ +  - ]:         32 :     c = next ();
     447         [ #  # ]:            :   } catch (const dfa_parse_error &e) {
     448   [ #  #  #  #  :            :     parse_error (_F("expected %c, found end of regex", expected));
                   #  # ]
     449                 :            :   }
     450                 :            : 
     451         [ -  + ]:         32 :   if (c != expected)
     452         [ #  # ]:          0 :     parse_error (_F("expected %c, found %c", expected, c));
     453                 :         32 : }
     454                 :            : 
     455                 :            : void
     456                 :          0 : regex_parser::parse_error (const string& msg, unsigned pos)
     457                 :            : {
     458         [ #  # ]:          0 :   throw dfa_parse_error(msg, input, pos);
     459                 :            : }
     460                 :            : 
     461                 :            : void
     462                 :          0 : regex_parser::parse_error (const string& msg)
     463                 :            : {
     464                 :          0 :   parse_error (msg, last_pos);
     465                 :          0 : }
     466                 :            : 
     467                 :            : // ------------------------------------------------------------------------
     468                 :            : 
     469                 :            : RegExp *
     470                 :         97 : regex_parser::parse_expr ()
     471                 :            : {
     472                 :         97 :   RegExp *result = parse_term ();
     473                 :            : 
     474                 :         97 :   char c = peek ();
     475 [ +  + ][ +  + ]:        118 :   while (c && c == '|')
                 [ +  + ]
     476                 :            :     {
     477                 :         21 :       next ();
     478                 :         21 :       RegExp *alt = parse_term ();
     479                 :         21 :       result = mkAlt (result, alt); // TODOXXX right-association o.k.?
     480                 :         21 :       c = peek ();
     481                 :            :     }
     482                 :            : 
     483                 :         97 :   return result;
     484                 :            : }
     485                 :            : 
     486                 :            : RegExp *
     487                 :        118 : regex_parser::parse_term ()
     488                 :            : {
     489                 :        118 :   RegExp *result = parse_factor ();
     490                 :            : 
     491                 :        118 :   char c = peek ();
     492 [ +  + ][ +  + ]:        155 :   while (c && c != '|' && c != ')')
         [ +  + ][ +  + ]
     493                 :            :     {
     494                 :         37 :       RegExp *next = parse_factor ();
     495         [ +  - ]:         37 :       result = new CatOp(result, next); // TODOXXX right-association o.k.?
     496                 :         37 :       c = peek ();
     497                 :            :     }
     498                 :            : 
     499                 :        118 :   return result;
     500                 :            : }
     501                 :            : 
     502                 :            : RegExp *
     503                 :        155 : regex_parser::parse_factor ()
     504                 :            : {
     505                 :            :   RegExp *result;
     506                 :        155 :   RegExp *old_result = NULL;
     507                 :            : 
     508                 :        155 :   char c = peek ();
     509 [ +  + ][ +  - ]:        155 :   if (! c || c == '|' || c == ')')
                 [ -  + ]
     510                 :            :     {
     511         [ +  - ]:         24 :       result = new NullOp();
     512                 :         24 :       return result;
     513                 :            :     }
     514 [ +  - ][ +  - ]:        131 :   else if (c == '*' || c == '+' || c == '?' || c == '{')
         [ +  - ][ -  + ]
     515                 :            :     {
     516         [ #  # ]:          0 :       parse_error(_F("unexpected '%c'", c));
     517                 :            :     }
     518                 :            : 
     519 [ +  + ][ +  + ]:        131 :   if (isspecial (c) && c != '\\')
                 [ +  + ]
     520                 :         66 :     next (); // c is guaranteed to be swallowed
     521                 :            : 
     522         [ +  + ]:        131 :   if (c == '.')
     523                 :            :     {
     524                 :         27 :       result = sc->mkDot ();
     525                 :            :     }
     526         [ +  + ]:        104 :   else if (c == '[')
     527                 :            :     {
     528                 :          7 :       result = parse_char_range ();
     529                 :          7 :       expect (']');
     530                 :            :     }
     531         [ +  + ]:         97 :   else if (c == '(')
     532                 :            :     {
     533                 :         25 :       result = parse_expr ();
     534                 :         25 :       expect (')');
     535                 :            :     }
     536 [ +  + ][ +  + ]:         72 :   else if (c == '^' || c == '$')
     537                 :            :     {
     538         [ +  - ]:          7 :       result = new AnchorOp(c);
     539                 :            :     }
     540                 :            :   else // escaped or ordinary character -- not yet swallowed
     541                 :            :     {
     542         [ +  - ]:         65 :       string accumulate;
     543                 :         65 :       char d = 0;
     544                 :            : 
     545 [ +  + ][ +  + ]:        179 :       while (c && ( ! isspecial (c) || c == '\\' ))
         [ +  + ][ +  + ]
     546                 :            :         {
     547         [ +  + ]:        126 :           if (c == '\\')
     548                 :            :             {
     549         [ +  - ]:         14 :               next ();
     550         [ +  - ]:         14 :               c = peek ();
     551                 :            :             }
     552                 :            : 
     553         [ +  - ]:        126 :           next ();
     554         [ +  - ]:        126 :           d = peek ();
     555                 :            : 
     556                 :            :           /* if we end in a closure, it only grabs the last character */
     557 [ +  + ][ +  + ]:        126 :           if (d == '*' || d == '+' || d == '?' || d == '{')
         [ +  + ][ -  + ]
     558                 :            :             {
     559                 :            :               /* save the last character */
     560                 :         12 :               d = c; break;
     561                 :            :             }
     562                 :            : 
     563         [ +  - ]:        114 :           accumulate.push_back (c);
     564                 :        114 :           c = d; d = 0;
     565                 :            :         }
     566                 :            : 
     567                 :            :       // strToRE takes this funky custom class
     568         [ +  - ]:         65 :       SubStr accumSubStr (accumulate.c_str ());
     569         [ +  - ]:         65 :       result = sc->strToRE (accumSubStr);
     570                 :            : 
     571         [ +  + ]:         65 :       if (d != 0) {
     572 [ +  - ][ +  - ]:         12 :         string dd(""); dd.push_back(d);
     573         [ +  - ]:         12 :         SubStr accumSubStr2 (dd.c_str());
     574                 :         12 :         old_result = result;
     575 [ +  - ][ +  - ]:         12 :         result = sc->strToRE (accumSubStr2);
     576         [ +  - ]:         65 :       }
     577                 :            :     }
     578                 :            : 
     579                 :            :   /* parse closures or other postfix operators */
     580                 :        131 :   c = peek ();
     581 [ +  + ][ +  + ]:        172 :   while (c == '*' || c == '+' || c == '?' || c == '{')
         [ +  + ][ -  + ]
                 [ +  + ]
     582                 :            :     {
     583                 :         41 :       next ();
     584                 :            : 
     585                 :            :       /* closure-type operators applied to $^ are definitely not kosher */
     586 [ +  - ][ +  - ]:         41 :       if (string(result->typeOf()) == string("AnchorOp"))
         [ +  - ][ +  - ]
         [ +  - ][ -  + ]
     587                 :            :         {
     588         [ #  # ]:          0 :           parse_error(_F("postfix closure '%c' applied to anchoring operator", c));
     589                 :            :         }
     590                 :            : 
     591         [ +  + ]:         41 :       if (c == '*')
     592                 :            :         {
     593 [ +  - ][ +  - ]:         33 :           result = mkAlt (new CloseOp(result), new NullOp());
     594                 :            :         }
     595         [ +  + ]:          8 :       else if (c == '+')
     596                 :            :         {
     597         [ +  - ]:          5 :           result = new CloseOp(result);
     598                 :            :         }
     599         [ +  - ]:          3 :       else if (c == '?')
     600                 :            :         {
     601         [ +  - ]:          3 :           result = mkAlt (result, new NullOp());
     602                 :            :         }
     603         [ #  # ]:          0 :       else if (c == '{')
     604                 :            :         {
     605                 :          0 :           int minsize = parse_number ();
     606                 :          0 :           int maxsize = -1;
     607                 :            : 
     608                 :          0 :           c = next ();
     609         [ #  # ]:          0 :           if (c == ',')
     610                 :            :             {
     611                 :          0 :               c = peek ();
     612         [ #  # ]:          0 :               if (c == '}')
     613                 :            :                 {
     614                 :          0 :                   next ();
     615                 :          0 :                   maxsize = -1;
     616                 :            :                 }
     617         [ #  # ]:          0 :               else if (isdigit (c))
     618                 :            :                 {
     619                 :          0 :                   maxsize = parse_number ();
     620                 :          0 :                   expect ('}');
     621                 :            :                 }
     622                 :            :               else
     623 [ #  # ][ #  # ]:          0 :                 parse_error(_("expected '}' or number"), next_pos);
                 [ #  # ]
     624                 :            :             }
     625         [ #  # ]:          0 :           else if (c == '}')
     626                 :            :             {
     627                 :          0 :               maxsize = minsize;
     628                 :            :             }
     629                 :            :           else
     630 [ #  # ][ #  # ]:          0 :             parse_error(_("expected ',' or '}'"));
                 [ #  # ]
     631                 :            : 
     632                 :            :           /* optimize {0,0}, {0,} and {1,} */
     633 [ #  # ][ #  # ]:          0 :           if (minsize == 0 && maxsize == 0)
     634                 :            :             {
     635                 :            :               // TODOXXX will not be correct in the case of subgroups
     636         [ #  # ]:          0 :               delete result;
     637         [ #  # ]:          0 :               result = new NullOp();
     638                 :            :             }
     639 [ #  # ][ #  # ]:          0 :           else if (minsize == 0 && maxsize == -1)
     640                 :            :             {
     641 [ #  # ][ #  # ]:          0 :               result = mkAlt (new CloseOp(result), new NullOp());
     642                 :            :             }
     643 [ #  # ][ #  # ]:          0 :           else if (minsize == 1 && maxsize == -1)
     644                 :            :             {
     645         [ #  # ]:          0 :               result = new CloseOp(result);
     646                 :            :             }
     647                 :            :           else
     648                 :            :             {
     649         [ #  # ]:          0 :               result = new CloseVOp(result, minsize, maxsize);
     650                 :            :             }
     651                 :            :         }
     652                 :            :       
     653                 :         41 :       c = peek ();
     654                 :            :     }
     655                 :            : 
     656         [ +  + ]:        131 :   if (old_result)
     657         [ +  - ]:         12 :     result = new CatOp(old_result, result);
     658                 :            : 
     659                 :        155 :   return result;
     660                 :            : }
     661                 :            : 
     662                 :            : RegExp *
     663                 :          7 : regex_parser::parse_char_range ()
     664                 :            : {
     665         [ +  - ]:          7 :   string accumulate;
     666                 :            : 
     667                 :          7 :   bool inv = false;
     668         [ +  - ]:          7 :   char c = peek ();
     669         [ -  + ]:          7 :   if (c == '^')
     670                 :            :     {
     671                 :          0 :       inv = true;
     672         [ #  # ]:          0 :       next ();
     673         [ #  # ]:          0 :       c = peek ();
     674                 :            :     }
     675                 :            : 
     676                 :            :   // grab ']' only if it is at the very start of the class
     677         [ -  + ]:          7 :   if (c == ']')
     678                 :            :     {
     679         [ #  # ]:          0 :       accumulate.push_back (c);
     680         [ #  # ]:          0 :       next ();
     681         [ #  # ]:          0 :       c = peek ();
     682                 :            :     }
     683                 :            : 
     684                 :            :   // grab range to next ']'
     685         [ +  + ]:         35 :   while (c != ']')
     686                 :            :     {
     687         [ +  - ]:         28 :       accumulate.push_back (c);
     688         [ +  - ]:         28 :       next ();
     689         [ +  - ]:         28 :       c = peek ();
     690                 :            :     }
     691                 :            : 
     692                 :            :   // invToRE and ranToRE take this funky custom class
     693         [ +  - ]:          7 :   SubStr accumSubStr (accumulate.c_str ());
     694 [ -  + ][ #  # ]:          7 :   return inv ? sc->invToRE (accumSubStr) : sc->ranToRE (accumSubStr);
         [ +  - ][ +  - ]
         [ -  + ][ +  - ]
           [ #  #  #  # ]
     695                 :            : }
     696                 :            : 
     697                 :            : unsigned
     698                 :          0 : regex_parser::parse_number ()
     699                 :            : {
     700         [ #  # ]:          0 :   string digits;
     701                 :            : 
     702         [ #  # ]:          0 :   char c = peek ();
     703 [ #  # ][ #  # ]:          0 :   while (c && isdigit (c))
                 [ #  # ]
     704                 :            :     {
     705         [ #  # ]:          0 :       next ();
     706         [ #  # ]:          0 :       digits.push_back (c);
     707         [ #  # ]:          0 :       c = peek ();
     708                 :            :     }
     709                 :            : 
     710 [ #  # ][ #  # ]:          0 :   if (digits == "") parse_error("expected number", next_pos);
         [ #  # ][ #  # ]
                 [ #  # ]
     711                 :            : 
     712                 :            :   // TODOXXX check for overly large numbers
     713 [ #  # ][ #  # ]:          0 :   return atoi (digits.c_str ());
     714 [ +  - ][ +  - ]:       7242 : }
     715                 :            : 
     716                 :            : /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */

Generated by: LCOV version 1.9