LCOV - stap.info - /mnt/wasteland/wcohen/systemtap_write/systemtap/re2c-migrate/re2c-regex.cxx

LCOV - code coverage report

Current view:	top level - mnt/wasteland/wcohen/systemtap_write/systemtap/re2c-migrate - re2c-regex.cxx (source / functions)		Hit	Total	Coverage
Test:	stap.info	Lines:	302	528	57.2 %
Date:	2013-03-08	Functions:	42	59	71.2 %
		Branches:	176	442	39.8 %

           Branch data     Line data    Source code

       1                 :            : // -*- C++ -*-
       2                 :            : // Copyright (C) 2012-2013 Red Hat Inc.
       3                 :            : //
       4                 :            : // This file is part of systemtap, and is free software.  You can
       5                 :            : // redistribute it and/or modify it under the terms of the GNU General
       6                 :            : // Public License (GPL); either version 2, or (at your option) any
       7                 :            : // later version.
       8                 :            : //
       9                 :            : // ---
      10                 :            : //
      11                 :            : // This file incorporates code from the re2c project; please see
      12                 :            : // re2c-migrate/README for details.
      13                 :            : 
      14                 :            : /* $Id$ */
      15                 :            : #include <time.h>
      16                 :            : #include <string.h>
      17                 :            : #include <iostream>
      18                 :            : #include <iomanip>
      19                 :            : #include <cctype>
      20                 :            : 
      21                 :            : #include "re2c-globals.h"
      22                 :            : #include "re2c-regex.h"
      23                 :            : #include "re2c-dfa.h"
      24                 :            : 
      25                 :            : namespace re2c
      26                 :            : {
      27                 :            : 
      28                 :            : // moved here from globals.h
      29                 :            : 
      30                 :            : unsigned nRealChars = 256;
      31                 :            : 
      32                 :            : // ------------------------------------------------------------
      33                 :            : 
      34                 :          0 : void Symbol::ClearTable()
      35                 :            : {
      36 [ #  # ][ #  # ]:          0 :         for (SymbolTable::iterator it = symbol_table.begin(); it != symbol_table.end(); ++it)
                 [ #  # ]
      37                 :            :         {
      38 [ #  # ][ #  # ]:          0 :                 delete it->second;
                 [ #  # ]
      39                 :            :         }
      40                 :            :         
      41                 :          0 :         symbol_table.clear();
      42                 :          0 : }
      43                 :            : 
      44                 :       2414 : Symbol::SymbolTable Symbol::symbol_table;
      45                 :            : 
      46                 :          0 : Symbol *Symbol::find(const SubStr &str)
      47                 :            : {
      48         [ #  # ]:          0 :         const std::string ss(str.to_string());
      49         [ #  # ]:          0 :         SymbolTable::const_iterator it = symbol_table.find(ss);
      50                 :            : 
      51 [ #  # ][ #  # ]:          0 :         if (it == symbol_table.end())
      52                 :            :         {
      53 [ #  # ][ #  # ]:          0 :                 return (*symbol_table.insert(SymbolTable::value_type(ss, new Symbol(str))).first).second;
         [ #  # ][ #  # ]
                 [ #  # ]
      54                 :            :         }
      55                 :            :         
      56         [ #  # ]:          0 :         return (*it).second;
      57                 :            : }
      58                 :            : 
      59                 :          0 : const Ins* showIns(std::ostream &o, const Ins &i, const Ins &base)
      60                 :            : {
      61                 :          0 :         o.width(3);
      62                 :          0 :         o << &i - &base << ": ";
      63                 :            : 
      64                 :          0 :         const Ins *ret = &(&i)[1];
      65                 :            : 
      66   [ #  #  #  #  :          0 :         switch (i.i.tag)
                #  #  # ]
      67                 :            :         {
      68                 :            : 
      69                 :            :                 case CHAR:
      70                 :            :                 {
      71                 :          0 :                         o << "match ";
      72                 :            : 
      73         [ #  # ]:          0 :                         for (; ret < (Ins*) i.i.link; ++ret)
      74                 :          0 :                                 prtCh(o, ret->c.value);
      75                 :            : 
      76                 :          0 :                         break;
      77                 :            :                 }
      78                 :            : 
      79                 :            :                 case GOTO:
      80                 :          0 :                 o << "goto " << ((Ins*) i.i.link - &base);
      81                 :          0 :                 break;
      82                 :            : 
      83                 :            :                 case FORK:
      84                 :          0 :                 o << "fork " << ((Ins*) i.i.link - &base);
      85                 :          0 :                 break;
      86                 :            : 
      87                 :            :                 case INIT:
      88                 :          0 :                 o << "init " << ((Ins*) i.i.link - &base);
      89                 :          0 :                 break;
      90                 :            : 
      91                 :            :                 case CTXT:
      92                 :          0 :                 o << "ctxt";
      93                 :          0 :                 break;
      94                 :            : 
      95                 :            :                 case TERM:
      96                 :          0 :                 o << "term " << ((RuleOp*) i.i.link)->accept;
      97                 :          0 :                 break;
      98                 :            :         }
      99                 :            : 
     100                 :          0 :         o << "\n";
     101                 :          0 :         return ret;
     102                 :            : }
     103                 :            : 
     104                 :          0 : unsigned RegExp::fixedLength()
     105                 :            : {
     106                 :          0 :         return ~0u;
     107                 :            : }
     108                 :            : 
     109                 :            : const char *NullOp::type = "NullOp";
     110                 :            : 
     111                 :        112 : void NullOp::calcSize(Char*)
     112                 :            : {
     113                 :        112 :         size = 0;
     114                 :        112 : }
     115                 :            : 
     116                 :         90 : unsigned NullOp::fixedLength()
     117                 :            : {
     118                 :         90 :         return 0;
     119                 :            : }
     120                 :            : 
     121                 :         64 : void NullOp::compile(Char*, Ins*)
     122                 :            : {
     123                 :            :         ;
     124                 :         64 : }
     125                 :            : 
     126                 :        112 : void NullOp::split(CharSet&)
     127                 :            : {
     128                 :            :         ;
     129                 :        112 : }
     130                 :            : 
     131                 :          0 : std::ostream& operator<<(std::ostream &o, const Range &r)
     132                 :            : {
     133         [ #  # ]:          0 :         if ((r.ub - r.lb) == 1)
     134                 :            :         {
     135                 :          0 :                 prtCh(o, r.lb);
     136                 :            :         }
     137                 :            :         else
     138                 :            :         {
     139                 :          0 :                 prtCh(o, r.lb);
     140                 :          0 :                 o << "-";
     141                 :          0 :                 prtCh(o, r.ub - 1);
     142                 :            :         }
     143                 :            : 
     144                 :          0 :         return o << r.next;
     145                 :            : }
     146                 :            : 
     147                 :          9 : Range *doUnion(Range *r1, Range *r2)
     148                 :            : {
     149                 :          9 :         Range *r, **rP = &r;
     150                 :            : 
     151                 :         12 :         for (;;)
     152                 :            :         {
     153                 :            :                 Range *s;
     154                 :            : 
     155         [ +  + ]:         12 :                 if (r1->lb <= r2->lb)
     156                 :            :                 {
     157 [ +  - ][ +  - ]:          9 :                         s = new Range(*r1);
     158                 :            :                 }
     159                 :            :                 else
     160                 :            :                 {
     161 [ +  - ][ +  - ]:          3 :                         s = new Range(*r2);
     162                 :            :                 }
     163                 :            : 
     164                 :         12 :                 *rP = s;
     165                 :         12 :                 rP = &s->next;
     166                 :            : 
     167                 :          3 :                 for (;;)
     168                 :            :                 {
     169         [ +  + ]:         15 :                         if (r1->lb <= r2->lb)
     170                 :            :                         {
     171         [ +  + ]:         10 :                                 if (r1->lb > s->ub)
     172                 :          1 :                                         break;
     173                 :            : 
     174         [ -  + ]:          9 :                                 if (r1->ub > s->ub)
     175                 :          0 :                                         s->ub = r1->ub;
     176                 :            : 
     177         [ +  + ]:          9 :                                 if (!(r1 = r1->next))
     178                 :            :                                 {
     179                 :          6 :                                         unsigned ub = 0;
     180                 :            : 
     181 [ +  + ][ +  + ]:         10 :                                         for (; r2 && r2->lb <= s->ub; r2 = r2->next)
                 [ +  + ]
     182                 :          4 :                                                 ub = r2->ub;
     183                 :            : 
     184         [ +  + ]:          6 :                                         if (ub > s->ub)
     185                 :          4 :                                                 s->ub = ub;
     186                 :            : 
     187                 :          6 :                                         *rP = r2;
     188                 :            : 
     189                 :          6 :                                         return r;
     190                 :            :                                 }
     191                 :            :                         }
     192                 :            :                         else
     193                 :            :                         {
     194         [ +  + ]:          5 :                                 if (r2->lb > s->ub)
     195                 :          2 :                                         break;
     196                 :            : 
     197         [ -  + ]:          3 :                                 if (r2->ub > s->ub)
     198                 :          0 :                                         s->ub = r2->ub;
     199                 :            : 
     200         [ +  - ]:          3 :                                 if (!(r2 = r2->next))
     201                 :            :                                 {
     202                 :          3 :                                         unsigned ub = 0;
     203                 :            : 
     204 [ +  - ][ -  + ]:          3 :                                         for (; r1 && r1->lb <= s->ub; r1 = r1->next)
                 [ -  + ]
     205                 :          0 :                                                 ub = r1->ub;
     206                 :            : 
     207         [ -  + ]:          3 :                                         if (ub > s->ub)
     208                 :          0 :                                                 s->ub = ub;
     209                 :            : 
     210                 :          3 :                                         *rP = r1;
     211                 :            : 
     212                 :          3 :                                         return r;
     213                 :            :                                 }
     214                 :            :                         }
     215                 :            :                 }
     216                 :            :         }
     217                 :            : 
     218                 :            :         *rP = NULL;
     219                 :            :         return r;
     220                 :            : }
     221                 :            : 
     222                 :         27 : Range *doDiff(Range *r1, Range *r2)
     223                 :            : {
     224                 :         27 :         Range *r, *s, **rP = &r;
     225                 :            : 
     226         [ +  + ]:         54 :         for (; r1; r1 = r1->next)
     227                 :            :         {
     228                 :         27 :                 unsigned lb = r1->lb;
     229                 :            : 
     230 [ +  - ][ -  + ]:         27 :                 for (; r2 && r2->ub <= r1->lb; r2 = r2->next)
                 [ -  + ]
     231                 :            : 
     232                 :            :                         ;
     233 [ +  + ][ +  - ]:         54 :                 for (; r2 && r2->lb < r1->ub; r2 = r2->next)
                 [ +  + ]
     234                 :            :                 {
     235         [ -  + ]:         27 :                         if (lb < r2->lb)
     236                 :            :                         {
     237 [ #  # ][ #  # ]:          0 :                                 *rP = s = new Range(lb, r2->lb);
     238                 :          0 :                                 rP = &s->next;
     239                 :            :                         }
     240                 :            : 
     241         [ -  + ]:         27 :                         if ((lb = r2->ub) >= r1->ub)
     242                 :          0 :                                 goto noMore;
     243                 :            :                 }
     244                 :            : 
     245 [ +  - ][ +  - ]:         27 :                 *rP = s = new Range(lb, r1->ub);
     246                 :         27 :                 rP = &s->next;
     247                 :            : 
     248                 :            : noMore:
     249                 :            :                 ;
     250                 :            :         }
     251                 :            : 
     252                 :         27 :         *rP = NULL;
     253                 :         27 :         return r;
     254                 :            : }
     255                 :            : 
     256                 :         81 : MatchOp *merge(MatchOp *m1, MatchOp *m2)
     257                 :            : {
     258         [ +  + ]:         81 :         if (!m1)
     259                 :         76 :                 return m2;
     260                 :            : 
     261         [ +  + ]:          5 :         if (!m2)
     262                 :          3 :                 return m1;
     263                 :            : 
     264         [ +  - ]:         81 :         return new MatchOp(doUnion(m1->match, m2->match));
     265                 :            : }
     266                 :            : 
     267                 :            : const char *MatchOp::type = "MatchOp";
     268                 :            : 
     269                 :          0 : void MatchOp::display(std::ostream &o) const
     270                 :            : {
     271                 :          0 :         o << match;
     272                 :          0 : }
     273                 :            : 
     274                 :        158 : void MatchOp::calcSize(Char *rep)
     275                 :            : {
     276                 :        158 :         size = 1;
     277                 :            : 
     278         [ +  + ]:        321 :         for (Range *r = match; r; r = r->next)
     279         [ +  + ]:       7301 :                 for (unsigned c = r->lb; c < r->ub; ++c)
     280         [ +  + ]:       7138 :                         if (rep[c] == c)
     281                 :        277 :                                 ++size;
     282                 :        158 : }
     283                 :            : 
     284                 :          0 : unsigned MatchOp::fixedLength()
     285                 :            : {
     286                 :          0 :         return 1;
     287                 :            : }
     288                 :            : 
     289                 :        158 : void MatchOp::compile(Char *rep, Ins *i)
     290                 :            : {
     291                 :        158 :         i->i.tag = CHAR;
     292                 :        158 :         i->i.link = &i[size];
     293                 :        158 :         Ins *j = &i[1];
     294                 :        158 :         unsigned bump = size;
     295                 :            : 
     296         [ +  + ]:        321 :         for (Range *r = match; r; r = r->next)
     297                 :            :         {
     298         [ +  + ]:       7301 :                 for (unsigned c = r->lb; c < r->ub; ++c)
     299                 :            :                 {
     300         [ +  + ]:       7138 :                         if (rep[c] == c)
     301                 :            :                         {
     302                 :        277 :                                 j->c.value = c;
     303                 :        277 :                                 j->c.bump = --bump;
     304                 :        277 :                                 j++;
     305                 :            :                         }
     306                 :            :                 }
     307                 :            :         }
     308                 :        158 : }
     309                 :            : 
     310                 :        158 : void MatchOp::split(CharSet &s)
     311                 :            : {
     312         [ +  + ]:        321 :         for (Range *r = match; r; r = r->next)
     313                 :            :         {
     314         [ +  + ]:       7301 :                 for (unsigned c = r->lb; c < r->ub; ++c)
     315                 :            :                 {
     316                 :       7138 :                         CharPtn *x = s.rep[c], *a = x->nxt;
     317                 :            : 
     318         [ +  + ]:       7138 :                         if (!a)
     319                 :            :                         {
     320         [ +  + ]:        172 :                                 if (x->card == 1)
     321                 :         46 :                                         continue;
     322                 :            : 
     323                 :        126 :                                 x->nxt = a = s.freeHead;
     324                 :            : 
     325         [ -  + ]:        126 :                                 if (!(s.freeHead = s.freeHead->nxt))
     326                 :          0 :                                         s.freeTail = &s.freeHead;
     327                 :            : 
     328                 :        126 :                                 a->nxt = NULL;
     329                 :            : 
     330                 :        126 :                                 x->fix = s.fix;
     331                 :            : 
     332                 :        126 :                                 s.fix = x;
     333                 :            :                         }
     334                 :            : 
     335         [ +  + ]:       7092 :                         if (--(x->card) == 0)
     336                 :            :                         {
     337                 :          4 :                                 *s.freeTail = x;
     338                 :          4 :                                 *(s.freeTail = &x->nxt) = NULL;
     339                 :            :                         }
     340                 :            : 
     341                 :       7092 :                         s.rep[c] = a;
     342                 :       7092 :                         ++(a->card);
     343                 :            :                 }
     344                 :            :         }
     345                 :            : 
     346         [ +  + ]:        284 :         for (; s.fix; s.fix = s.fix->fix)
     347         [ +  + ]:        126 :                 if (s.fix->card)
     348                 :        122 :                         s.fix->nxt = NULL;
     349                 :        158 : }
     350                 :            : 
     351                 :            : const char *AnchorOp::type = "AnchorOp";
     352                 :            : 
     353                 :          7 : void AnchorOp::calcSize(Char *rep)
     354                 :            : {
     355         [ +  + ]:          7 :   size = atype == '^' ? 1 : 2;
     356                 :          7 : }
     357                 :            : 
     358                 :          0 : unsigned AnchorOp::fixedLength()
     359                 :            : {
     360                 :          0 :   return 0;
     361                 :            : }
     362                 :            : 
     363                 :          7 : void AnchorOp::compile(Char *rep, Ins *i)
     364                 :            : {
     365         [ +  + ]:          7 :   if (atype == '^')
     366                 :            :     {
     367                 :          2 :       i->i.tag = INIT;
     368                 :          2 :       i->i.link = &i[1];
     369                 :            :     }
     370                 :            :   else // atype == '$'
     371                 :            :     {
     372                 :          5 :       i->i.tag = CHAR;
     373                 :          5 :       i->i.link = &i[2];
     374                 :          5 :       Ins *j = &i[1];
     375                 :          5 :       j->c.value = '\0';
     376                 :          5 :       j->c.bump = 1;
     377                 :            :     }
     378                 :          7 : }
     379                 :            : 
     380                 :          7 : void AnchorOp::split(CharSet &s)
     381                 :            : {
     382                 :            :   ;
     383                 :          7 : }
     384                 :            : 
     385                 :         27 : RegExp * mkDiff(RegExp *e1, RegExp *e2)
     386                 :            : {
     387                 :            :         MatchOp *m1, *m2;
     388                 :            : 
     389         [ -  + ]:         27 :         if (!(m1 = (MatchOp*) e1->isA(MatchOp::type)))
     390                 :          0 :                 return NULL;
     391                 :            : 
     392         [ -  + ]:         27 :         if (!(m2 = (MatchOp*) e2->isA(MatchOp::type)))
     393                 :          0 :                 return NULL;
     394                 :            : 
     395                 :         27 :         Range *r = doDiff(m1->match, m2->match);
     396                 :            : 
     397 [ +  - ][ +  - ]:         27 :         return r ? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
                 [ #  # ]
     398                 :            : }
     399                 :            : 
     400                 :        162 : RegExp *doAlt(RegExp *e1, RegExp *e2)
     401                 :            : {
     402         [ +  + ]:        162 :         if (!e1)
     403                 :         79 :                 return e2;
     404                 :            : 
     405         [ +  + ]:         83 :         if (!e2)
     406                 :          4 :                 return e1;
     407                 :            : 
     408         [ +  - ]:        162 :         return new AltOp(e1, e2);
     409                 :            : }
     410                 :            : 
     411                 :         81 : RegExp *mkAlt(RegExp *e1, RegExp *e2)
     412                 :            : {
     413                 :            :   AltOp *a;
     414                 :            :   RegExp *r;
     415                 :            :         MatchOp *m1, *m2;
     416                 :            : 
     417                 :            :         /* preserve correct anchoring through optimizations: */
     418 [ -  + ][ #  # ]:         81 :         bool anchored = e1->anchored && e2->anchored;
     419                 :            : 
     420         [ +  + ]:         81 :         if ((a = (AltOp*) e1->isA(AltOp::type)))
     421                 :            :         {
     422         [ -  + ]:          8 :                 if ((m1 = (MatchOp*) a->exp1->isA(MatchOp::type)))
     423                 :          0 :                         e1 = a->exp2;
     424                 :            :         }
     425         [ +  + ]:         73 :         else if ((m1 = (MatchOp*) e1->isA(MatchOp::type)))
     426                 :            :         {
     427                 :          5 :                 e1 = NULL;
     428                 :            :         }
     429                 :            : 
     430         [ -  + ]:         81 :         if ((a = (AltOp*) e2->isA(AltOp::type)))
     431                 :            :         {
     432         [ #  # ]:          0 :                 if ((m2 = (MatchOp*) a->exp1->isA(MatchOp::type)))
     433                 :          0 :                         e2 = a->exp2;
     434                 :            :         }
     435         [ +  + ]:         81 :         else if ((m2 = (MatchOp*) e2->isA(MatchOp::type)))
     436                 :            :         {
     437                 :          4 :                 e2 = NULL;
     438                 :            :         }
     439                 :            : 
     440                 :         81 :         r = doAlt(merge(m1, m2), doAlt(e1, e2));
     441                 :         81 :         r->anchored = anchored;
     442                 :         81 :         return r;
     443                 :            : }
     444                 :            : 
     445                 :            : const char *AltOp::type = "AltOp";
     446                 :            : 
     447                 :         79 : void AltOp::calcSize(Char *rep)
     448                 :            : {
     449                 :         79 :         exp1->calcSize(rep);
     450                 :         79 :         exp2->calcSize(rep);
     451                 :         79 :         size = exp1->size + exp2->size + 2;
     452                 :         79 : }
     453                 :            : 
     454                 :          0 : unsigned AltOp::fixedLength()
     455                 :            : {
     456                 :          0 :         unsigned l1 = exp1->fixedLength();
     457                 :          0 :         unsigned l2 = exp1->fixedLength();
     458                 :            : 
     459 [ #  # ][ #  # ]:          0 :         if (l1 != l2 || l1 == ~0u)
     460                 :          0 :                 return ~0u;
     461                 :            : 
     462                 :          0 :         return l1;
     463                 :            : }
     464                 :            : 
     465                 :         79 : void AltOp::compile(Char *rep, Ins *i)
     466                 :            : {
     467                 :         79 :         i->i.tag = FORK;
     468                 :         79 :         Ins *j = &i[exp1->size + 1];
     469                 :         79 :         i->i.link = &j[1];
     470                 :         79 :         exp1->compile(rep, &i[1]);
     471                 :         79 :         j->i.tag = GOTO;
     472                 :         79 :         j->i.link = &j[exp2->size + 1];
     473                 :         79 :         exp2->compile(rep, &j[1]);
     474                 :         79 : }
     475                 :            : 
     476                 :         79 : void AltOp::split(CharSet &s)
     477                 :            : {
     478                 :         79 :         exp1->split(s);
     479                 :         79 :         exp2->split(s);
     480                 :         79 : }
     481                 :            : 
     482                 :            : const char *CatOp::type = "CatOp";
     483                 :            : 
     484                 :        126 : void CatOp::calcSize(Char *rep)
     485                 :            : {
     486                 :        126 :         exp1->calcSize(rep);
     487                 :        126 :         exp2->calcSize(rep);
     488                 :        126 :         size = exp1->size + exp2->size;
     489                 :        126 : }
     490                 :            : 
     491                 :          0 : unsigned CatOp::fixedLength()
     492                 :            : {
     493                 :            :         unsigned l1, l2;
     494                 :            : 
     495         [ #  # ]:          0 :         if ((l1 = exp1->fixedLength()) != ~0u )
     496         [ #  # ]:          0 :                 if ((l2 = exp2->fixedLength()) != ~0u)
     497                 :          0 :                         return l1 + l2;
     498                 :            : 
     499                 :          0 :         return ~0u;
     500                 :            : }
     501                 :            : 
     502                 :        126 : void CatOp::compile(Char *rep, Ins *i)
     503                 :            : {
     504                 :        126 :         exp1->compile(rep, &i[0]);
     505                 :        126 :         exp2->compile(rep, &i[exp1->size]);
     506                 :        126 : }
     507                 :            : 
     508                 :        126 : void CatOp::split(CharSet &s)
     509                 :            : {
     510                 :        126 :         exp1->split(s);
     511                 :        126 :         exp2->split(s);
     512                 :        126 : }
     513                 :            : 
     514                 :            : const char *CloseOp::type = "CloseOp";
     515                 :            : 
     516                 :         38 : void CloseOp::calcSize(Char *rep)
     517                 :            : {
     518                 :         38 :         exp->calcSize(rep);
     519                 :         38 :         size = exp->size + 1;
     520                 :         38 : }
     521                 :            : 
     522                 :         38 : void CloseOp::compile(Char *rep, Ins *i)
     523                 :            : {
     524                 :         38 :         exp->compile(rep, &i[0]);
     525                 :         38 :         i += exp->size;
     526                 :         38 :         i->i.tag = FORK;
     527                 :         38 :         i->i.link = i - exp->size;
     528                 :         38 : }
     529                 :            : 
     530                 :         38 : void CloseOp::split(CharSet &s)
     531                 :            : {
     532                 :         38 :         exp->split(s);
     533                 :         38 : }
     534                 :            : 
     535                 :            : const char *CloseVOp::type = "CloseVOp";
     536                 :            : 
     537                 :          0 : void CloseVOp::calcSize(Char *rep)
     538                 :            : {
     539                 :          0 :         exp->calcSize(rep);
     540                 :            : 
     541         [ #  # ]:          0 :         if (max >= 0)
     542                 :            :         {
     543                 :          0 :                 size = (exp->size * min) + ((1 + exp->size) * (max - min));
     544                 :            :         }
     545                 :            :         else
     546                 :            :         {
     547                 :          0 :                 size = (exp->size * min) + 1;
     548                 :            :         }
     549                 :          0 : }
     550                 :            : 
     551                 :          0 : void CloseVOp::compile(Char *rep, Ins *i)
     552                 :            : {
     553                 :            :         Ins *jumppoint;
     554                 :            :         int st;
     555                 :          0 :         jumppoint = i + ((1 + exp->size) * (max - min));
     556                 :            : 
     557         [ #  # ]:          0 :         for (st = min; st < max; st++)
     558                 :            :         {
     559                 :          0 :                 i->i.tag = FORK;
     560                 :          0 :                 i->i.link = jumppoint;
     561                 :          0 :                 i++;
     562                 :          0 :                 exp->compile(rep, &i[0]);
     563                 :          0 :                 i += exp->size;
     564                 :            :         }
     565                 :            : 
     566         [ #  # ]:          0 :         for (st = 0; st < min; st++)
     567                 :            :         {
     568                 :          0 :                 exp->compile(rep, &i[0]);
     569                 :          0 :                 i += exp->size;
     570                 :            : 
     571 [ #  # ][ #  # ]:          0 :                 if (max < 0 && st == 0)
     572                 :            :                 {
     573                 :          0 :                         i->i.tag = FORK;
     574                 :          0 :                         i->i.link = i - exp->size;
     575                 :          0 :                         i++;
     576                 :            :                 }
     577                 :            :         }
     578                 :          0 : }
     579                 :            : 
     580                 :          0 : void CloseVOp::split(CharSet &s)
     581                 :            : {
     582                 :          0 :         exp->split(s);
     583                 :          0 : }
     584                 :            : 
     585                 :            : RegExp *expr(Scanner &);
     586                 :            : 
     587                 :        147 : unsigned Scanner::unescape(SubStr &s) const
     588                 :            : {
     589                 :            :         static const char * hex = "0123456789abcdef";
     590                 :            :         static const char * oct = "01234567";
     591                 :            : 
     592                 :        147 :         s.len--;
     593                 :        147 :         unsigned c, ucb = 0;
     594                 :            : 
     595 [ +  + ][ +  - ]:        147 :         if ((c = *s.str++) != '\\' || s.len == 0)
                 [ +  - ]
     596                 :            :         {
     597                 :        147 :                 return c;
     598                 :            :         }
     599                 :            : 
     600                 :          0 :         s.len--;
     601                 :            : 
     602   [ #  #  #  #  :          0 :         switch (c = *s.str++)
          #  #  #  #  #  
             #  #  #  # ]
     603                 :            :         {
     604                 :          0 :                 case 'n': return '\n';
     605                 :          0 :                 case 't': return '\t';
     606                 :          0 :                 case 'v': return '\v';
     607                 :          0 :                 case 'b': return '\b';
     608                 :          0 :                 case 'r': return '\r';
     609                 :          0 :                 case 'f': return '\f';
     610                 :          0 :                 case 'a': return '\a';
     611                 :            : 
     612                 :            :                 case 'x':
     613                 :            :                 {
     614         [ #  # ]:          0 :                         if (s.len < 2)
     615                 :            :                         {
     616                 :          0 :                                 fatal(s.ofs()+s.len, "Illegal hexadecimal character code, two hexadecimal digits are required");
     617                 :          0 :                                 return ~0u;
     618                 :            :                         }
     619                 :            :                         
     620                 :          0 :                         const char *p1 = strchr(hex, tolower(s.str[0]));
     621                 :          0 :                         const char *p2 = strchr(hex, tolower(s.str[1]));
     622                 :            : 
     623 [ #  # ][ #  # ]:          0 :                         if (!p1 || !p2)
     624                 :            :                         {
     625         [ #  # ]:          0 :                                 fatal(s.ofs()+(p1?1:0), "Illegal hexadecimal character code");
     626                 :          0 :                                 return ~0u;
     627                 :            :                         }
     628                 :            :                         else
     629                 :            :                         {
     630                 :          0 :                                 s.len -= 2;
     631                 :          0 :                                 s.str += 2;
     632                 :            :                                 
     633                 :            :                                 unsigned v = (unsigned)((p1 - hex) << 4) 
     634                 :          0 :                                        + (unsigned)((p2 - hex));
     635                 :            :         
     636                 :          0 :                                 return v;
     637                 :            :                         }
     638                 :            :                 }
     639                 :            : 
     640                 :            :                 case 'U':
     641                 :            :                 {
     642         [ #  # ]:          0 :                         if (s.len < 8)
     643                 :            :                         {
     644                 :          0 :                                 fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required");
     645                 :          0 :                                 return ~0u;
     646                 :            :                         }
     647                 :            : 
     648                 :          0 :                         unsigned l = 0;
     649                 :            :                                                 
     650         [ #  # ]:          0 :                         if (s.str[0] == '0')
     651                 :            :                         {
     652                 :          0 :                                 l++;
     653         [ #  # ]:          0 :                                 if (s.str[1] == '0')
     654                 :            :                                 {
     655                 :          0 :                                         l++;
     656 [ #  # ][ #  # ]:          0 :                                         if (s.str[2] == '0' || (s.str[2] == '1' && uFlag))
                 [ #  # ]
     657                 :            :                                         {
     658                 :          0 :                                                 l++;
     659         [ #  # ]:          0 :                                                 if (uFlag) {
     660                 :          0 :                                                         const char *u3 = strchr(hex, tolower(s.str[2]));
     661                 :          0 :                                                         const char *u4 = strchr(hex, tolower(s.str[3]));
     662 [ #  # ][ #  # ]:          0 :                                                         if (u3 && u4)
     663                 :            :                                                         {
     664                 :            :                                                                 ucb = (unsigned)((u3 - hex) << 20)
     665                 :          0 :                                                                 + (unsigned)((u4 - hex) << 16);
     666                 :          0 :                                                                 l++;
     667                 :            :                                                         }
     668                 :            :                                                 }
     669         [ #  # ]:          0 :                                                 else if (s.str[3] == '0')
     670                 :            :                                                 {
     671                 :          0 :                                                         l++;
     672                 :            :                                                 }
     673                 :            :                                         }
     674                 :            :                                 }
     675                 :            :                         }
     676                 :            : 
     677         [ #  # ]:          0 :                         if (l != 4)
     678                 :            :                         {
     679                 :          0 :                                 fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required");
     680                 :            :                         }
     681                 :            : 
     682                 :          0 :                         s.len -= 4;
     683                 :          0 :                         s.str += 4;
     684                 :            :                         
     685                 :            :                         // no break;
     686                 :            :                 }
     687                 :            :                 case 'X':
     688                 :            :                 case 'u':
     689                 :            :                 {
     690         [ #  # ]:          0 :                         if (s.len < 4)
     691                 :            :                         {
     692                 :          0 :                                 fatal(s.ofs()+s.len, 
     693                 :            :                                         c == 'X'
     694                 :            :                                         ? "Illegal hexadecimal character code, four hexadecimal digits are required"
     695         [ #  # ]:          0 :                                         : "Illegal unicode character, four hexadecimal digits are required");
     696                 :          0 :                                 return ~0u;
     697                 :            :                         }
     698                 :            :                         
     699                 :          0 :                         const char *p1 = strchr(hex, tolower(s.str[0]));
     700                 :          0 :                         const char *p2 = strchr(hex, tolower(s.str[1]));
     701                 :          0 :                         const char *p3 = strchr(hex, tolower(s.str[2]));
     702                 :          0 :                         const char *p4 = strchr(hex, tolower(s.str[3]));
     703                 :            : 
     704 [ #  # ][ #  # ]:          0 :                         if (!p1 || !p2 || !p3 || !p4)
         [ #  # ][ #  # ]
     705                 :            :                         {
     706                 :          0 :                                 fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), 
     707                 :            :                                         c == 'X'
     708                 :            :                                         ? "Illegal hexadecimal character code, non hexxdecimal digit found"
     709 [ #  # ][ #  # ]:          0 :                                         : "Illegal unicode character, non hexadecimal digit found");
         [ #  # ][ #  # ]
     710                 :          0 :                                 return ~0u;
     711                 :            :                         }
     712                 :            :                         else
     713                 :            :                         {
     714                 :          0 :                                 s.len -= 4;
     715                 :          0 :                                 s.str += 4;
     716                 :            :                                 
     717                 :            :                                 unsigned v = (unsigned)((p1 - hex) << 12) 
     718                 :            :                                        + (unsigned)((p2 - hex) <<  8)
     719                 :            :                                        + (unsigned)((p3 - hex) <<  4)
     720                 :            :                                        + (unsigned)((p4 - hex))
     721                 :          0 :                                        + ucb;
     722                 :            :         
     723         [ #  # ]:          0 :                                 if (v >= nRealChars)
     724                 :            :                                 {
     725                 :            :                                         fatal(s.ofs(),
     726                 :            :                                                 c == 'X'
     727                 :            :                                                 ? "Illegal hexadecimal character code, out of range"
     728         [ #  # ]:          0 :                                                 : "Illegal unicode character, out of range");
     729                 :            :                                 }
     730                 :            :         
     731                 :          0 :                                 return v;
     732                 :            :                         }
     733                 :            :                 }
     734                 :            : 
     735                 :            :                 case '4':
     736                 :            :                 case '5':
     737                 :            :                 case '6':
     738                 :            :                 case '7':
     739                 :            :                 {
     740                 :          0 :                         fatal(s.ofs()-1, "Illegal octal character code, first digit must be 0 thru 3");
     741                 :          0 :                         return ~0u;
     742                 :            :                 }
     743                 :            : 
     744                 :            :                 case '0':
     745                 :            :                 case '1':
     746                 :            :                 case '2':
     747                 :            :                 case '3':
     748                 :            :                 {
     749         [ #  # ]:          0 :                         if (s.len < 2)
     750                 :            :                         {
     751                 :          0 :                                 fatal(s.ofs()+s.len, "Illegal octal character code, three octal digits are required");
     752                 :          0 :                                 return ~0u;
     753                 :            :                         }
     754                 :            : 
     755                 :          0 :                         const char *p0 = strchr(oct, c);
     756                 :          0 :                         const char *p1 = strchr(oct, s.str[0]);
     757                 :          0 :                         const char *p2 = strchr(oct, s.str[1]);
     758                 :            : 
     759 [ #  # ][ #  # ]:          0 :                         if (!p0 || !p1 || !p2)
                 [ #  # ]
     760                 :            :                         {
     761         [ #  # ]:          0 :                                 fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found");
     762                 :          0 :                                 return ~0u;
     763                 :            :                         }
     764                 :            :                         else
     765                 :            :                         {
     766                 :          0 :                                 s.len -= 2;
     767                 :          0 :                                 s.str += 2;
     768                 :            :                                 
     769                 :          0 :                                 unsigned v = (unsigned)((p0 - oct) << 6) + (unsigned)((p1 - oct) << 3) + (unsigned)(p2 - oct);
     770                 :            :         
     771                 :          0 :                                 return v;
     772                 :            :                         }
     773                 :            :                 }
     774                 :            : 
     775                 :            :                 default:
     776                 :        147 :                 return c;
     777                 :            :         }
     778                 :            : }
     779                 :            : 
     780                 :          0 : std::string& Scanner::unescape(SubStr& str_in, std::string& str_out) const
     781                 :            : {
     782                 :          0 :         str_out.clear();
     783                 :            : 
     784         [ #  # ]:          0 :         while(str_in.len)
     785                 :            :         {
     786                 :          0 :                 unsigned c = unescape(str_in);
     787                 :            : 
     788         [ #  # ]:          0 :                 if (c > 0xFF)
     789                 :            :                 {
     790                 :          0 :                         fatal(str_in.ofs(), "Illegal character");
     791                 :            :                 }
     792                 :            : 
     793                 :          0 :                 str_out += static_cast<char>(c);
     794                 :            :         }
     795                 :            : 
     796                 :          0 :         return str_out;
     797                 :            : }
     798                 :            : 
     799                 :         14 : Range * Scanner::getRange(SubStr &s) const
     800                 :            : {
     801                 :            :         // unsigned lb = unescape(s), ub, xlb, xub, c;
     802                 :         14 :         unsigned lb = unescape(s), ub, xlb, c;
     803                 :            : 
     804 [ +  + ][ +  + ]:         14 :         if (s.len < 2 || *s.str != '-')
     805                 :            :         {
     806                 :          7 :                 ub = lb;
     807                 :            :         }
     808                 :            :         else
     809                 :            :         {
     810                 :          7 :                 s.len--;
     811                 :          7 :                 s.str++;
     812                 :          7 :                 ub = unescape(s);
     813                 :            : 
     814         [ -  + ]:          7 :                 if (ub < lb)
     815                 :            :                 {
     816                 :          0 :                         unsigned tmp = lb;
     817                 :          0 :                         lb = ub;
     818                 :          0 :                         ub = tmp;
     819                 :            :                 }
     820                 :            :                 
     821                 :          7 :                 xlb = xlat(lb);
     822                 :            :                 // xub = xlat(ub);
     823                 :            :                 
     824         [ +  + ]:        127 :                 for(c = lb; c <= ub; c++)
     825                 :            :                 {
     826 [ +  - ][ -  + ]:        120 :                         if (!(xlb <= xlat(c) && xlat(c) <= ub))
                 [ -  + ]
     827                 :            :                         {
     828                 :            :                                 /* range doesn't work */
     829         [ #  # ]:          0 :                                 Range * r = new Range(xlb, xlb + 1);
     830         [ #  # ]:          0 :                                 for (c = lb + 1; c <= ub; c++)
     831                 :            :                                 {
     832         [ #  # ]:          0 :                                         r = doUnion(r, new Range(xlat(c), xlat(c) + 1));
     833                 :            :                                 }
     834                 :          0 :                                 return r;
     835                 :            :                         }
     836                 :            :                 }
     837                 :            :         }
     838                 :            : 
     839         [ +  - ]:         14 :         return new Range(xlat(lb), xlat(ub) + 1);
     840                 :            : }
     841                 :            : 
     842                 :        153 : RegExp * Scanner::matchChar(unsigned c) const
     843                 :            : {
     844                 :        153 :         unsigned xc = xlat(c);
     845 [ +  - ][ +  - ]:        153 :         return new MatchOp(new Range(xc, xc + 1));
     846                 :            : }
     847                 :            : 
     848                 :         77 : RegExp * Scanner::strToRE(SubStr s) const
     849                 :            : {
     850                 :            :         // strToRE used to take quoted literal,
     851                 :            :         // but the quote-stripping is no longer needed
     852                 :            :         //s.len -= 2;
     853                 :            :         //s.str += 1;
     854                 :            : 
     855         [ +  + ]:         77 :         if (s.len == 0)
     856         [ +  - ]:          4 :                 return new NullOp;
     857                 :            : 
     858                 :         73 :         RegExp *re = matchChar(unescape(s));
     859                 :            : 
     860         [ +  + ]:        126 :         while (s.len > 0)
     861         [ +  - ]:         53 :                 re = new CatOp(re, matchChar(unescape(s)));
     862                 :            : 
     863                 :         77 :         return re;
     864                 :            : }
     865                 :            : 
     866                 :          0 : RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const
     867                 :            : {
     868                 :          0 :         s.len -= 2;
     869                 :          0 :         s.str += 1;
     870                 :            : 
     871         [ #  # ]:          0 :         if (s.len == 0)
     872         [ #  # ]:          0 :                 return new NullOp;
     873                 :            : 
     874                 :          0 :         unsigned c = unescape(s);
     875                 :            : 
     876                 :            :         RegExp *re, *reL, *reU;
     877                 :            : 
     878 [ #  # ][ #  # ]:          0 :         if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
         [ #  # ][ #  # ]
     879                 :            :         {
     880                 :          0 :                 reL = matchChar(tolower(c));
     881                 :          0 :                 reU = matchChar(toupper(c));
     882                 :          0 :                 re = mkAlt(reL, reU);
     883                 :            :         }
     884                 :            :         else
     885                 :            :         {
     886                 :          0 :                 re = matchChar(c);
     887                 :            :         }
     888                 :            : 
     889         [ #  # ]:          0 :         while (s.len > 0)
     890                 :            :         {
     891                 :          0 :                 unsigned c = unescape(s);
     892                 :            : 
     893 [ #  # ][ #  # ]:          0 :                 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
         [ #  # ][ #  # ]
     894                 :            :                 {
     895                 :          0 :                         reL = matchChar(tolower(c));
     896                 :          0 :                         reU = matchChar(toupper(c));
     897         [ #  # ]:          0 :                         re = new CatOp(re, mkAlt(reL, reU));
     898                 :            :                 }
     899                 :            :                 else
     900                 :            :                 {
     901         [ #  # ]:          0 :                         re = new CatOp(re, matchChar(c));
     902                 :            :                 }
     903                 :            :         }
     904                 :            : 
     905                 :          0 :         return re;
     906                 :            : }
     907                 :            : 
     908                 :          7 : RegExp * Scanner::ranToRE(SubStr s) const
     909                 :            : {
     910                 :            :         // ranToRE et al. used to take bracketed literal,
     911                 :            :         // but the bracket-stripping is no longer needed
     912                 :            :         //s.len -= 2;
     913                 :            :         //s.str += 1;
     914                 :            : 
     915         [ -  + ]:          7 :         if (s.len == 0)
     916         [ #  # ]:          0 :                 return new NullOp;
     917                 :            : 
     918                 :          7 :         Range *r = getRange(s);
     919                 :            : 
     920         [ +  + ]:         14 :         while (s.len > 0)
     921                 :          7 :                 r = doUnion(r, getRange(s));
     922                 :            : 
     923         [ +  - ]:          7 :         return new MatchOp(r);
     924                 :            : }
     925                 :            : 
     926                 :         27 : RegExp * Scanner::getAnyRE() const
     927                 :            : {
     928         [ -  + ]:         27 :         if (eFlag)
     929                 :            :         {
     930         [ #  # ]:          0 :                 return ranToRE(SubStr("[\\000-\\377]"));
     931                 :            :         }
     932                 :            :         else
     933                 :            :         {
     934 [ +  - ][ +  - ]:         27 :                 return new MatchOp(new Range(0, nRealChars));
     935                 :            :         }
     936                 :            : }
     937                 :            : 
     938                 :          0 : RegExp * Scanner::invToRE(SubStr s) const
     939                 :            : {
     940                 :            :         // ranToRE et al. used to take bracketed literal,
     941                 :            :         // but the bracket-stripping is no longer needed
     942                 :            :         //s.len--;
     943                 :            :         //s.str++;
     944                 :            :         
     945                 :          0 :         RegExp * any = getAnyRE();
     946                 :            : 
     947                 :            :         // TODOXXX this was probably erroneous
     948                 :            :         // if (s.len <= 2)
     949                 :            :         // {
     950                 :            :         //      return any;
     951                 :            :         // }
     952                 :            : 
     953         [ #  # ]:          0 :         RegExp * ran = ranToRE(s);
     954                 :          0 :         RegExp * inv = mkDiff(any, ran);
     955                 :            :         
     956         [ #  # ]:          0 :         delete ran;
     957         [ #  # ]:          0 :         delete any;
     958                 :            :         
     959                 :          0 :         return inv;
     960                 :            : }
     961                 :            : 
     962                 :         27 : RegExp * Scanner::mkDot() const
     963                 :            : {
     964                 :         27 :         RegExp * any = getAnyRE();
     965                 :            :         // TODOXXX we want to avoid having .* &such overrun the end
     966                 :            :         // TODOXXX is this the only place '\n' is hardcoded?
     967                 :            :         // TODOXXX what about actually respecting '\n' in POSIX ERE?
     968                 :            :         // RegExp * ran = matchChar('\n'); // TODOXXX may want to keep this?
     969                 :         27 :         RegExp * ran = matchChar('\0');
     970                 :         27 :         RegExp * inv = mkDiff(any, ran);
     971                 :            :         
     972         [ +  - ]:         27 :         delete ran;
     973         [ +  - ]:         27 :         delete any;
     974                 :            :         
     975                 :         27 :         return inv;
     976                 :            : }
     977                 :            : 
     978                 :            : const char *RuleOp::type = "RuleOp";
     979                 :            : 
     980                 :         48 : RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, unsigned a)
     981                 :            :         : exp(e)
     982                 :            :         , ctx(c)
     983                 :            :         , ins(NULL)
     984                 :            :         , accept(a)
     985                 :            :         , code(t)
     986                 :         48 :         , line(0)
     987                 :            : {
     988                 :         48 :         anchored = e->anchored;
     989                 :         48 : }
     990                 :            : 
     991                 :          0 : RuleOp* RuleOp::copy(unsigned a) const
     992                 :            : {
     993         [ #  # ]:          0 :         Token *token = new Token(*code);
     994         [ #  # ]:          0 :         return new RuleOp(exp, ctx, token, a);
     995                 :            : }
     996                 :            : 
     997                 :         48 : void RuleOp::calcSize(Char *rep)
     998                 :            : {
     999                 :         48 :         exp->calcSize(rep);
    1000                 :         48 :         ctx->calcSize(rep);
    1001         [ -  + ]:         48 :         size = exp->size + (ctx->size ? ctx->size + 2 : 1);
    1002                 :         48 : }
    1003                 :            : 
    1004                 :         48 : void RuleOp::compile(Char *rep, Ins *i)
    1005                 :            : {
    1006                 :         48 :         ins = i;
    1007                 :         48 :         exp->compile(rep, &i[0]);
    1008                 :         48 :         i += exp->size;
    1009         [ -  + ]:         48 :         if (ctx->size)
    1010                 :            :         {
    1011                 :          0 :                 i->i.tag = CTXT;
    1012                 :          0 :                 i->i.link = &i[1];
    1013                 :          0 :                 i++;
    1014                 :          0 :                 ctx->compile(rep, &i[0]);
    1015                 :          0 :                 i += ctx->size;
    1016                 :            :         }
    1017                 :         48 :         i->i.tag = TERM;
    1018                 :         48 :         i->i.link = this;
    1019                 :         48 : }
    1020                 :            : 
    1021                 :         48 : void RuleOp::split(CharSet &s)
    1022                 :            : {
    1023                 :         48 :         exp->split(s);
    1024                 :         48 :         ctx->split(s);
    1025                 :         48 : }
    1026                 :            : 
    1027                 :        361 : void optimize(Ins *i)
    1028                 :            : {
    1029         [ +  + ]:        574 :         while (!isMarked(i))
    1030                 :            :         {
    1031                 :        433 :                 mark(i);
    1032                 :            : 
    1033         [ +  + ]:        433 :                 if (i->i.tag == CHAR)
    1034                 :            :                 {
    1035                 :        163 :                         i = (Ins*) i->i.link;
    1036                 :            :                 }
    1037 [ +  + ][ +  + ]:        270 :                 else if (i->i.tag == GOTO || i->i.tag == FORK)
    1038                 :            :                 {
    1039                 :        220 :                         Ins *target = (Ins*) i->i.link;
    1040                 :        220 :                         optimize(target);
    1041                 :            : 
    1042         [ +  + ]:        220 :                         if (target->i.tag == GOTO)
    1043         [ +  + ]:         58 :                                 i->i.link = target->i.link == target ? i : target;
    1044                 :            : 
    1045         [ +  + ]:        220 :                         if (i->i.tag == FORK)
    1046                 :            :                         {
    1047                 :        117 :                                 Ins *follow = (Ins*) & i[1];
    1048                 :        117 :                                 optimize(follow);
    1049                 :            : 
    1050 [ +  + ][ -  + ]:        117 :                                 if (follow->i.tag == GOTO && follow->i.link == follow)
    1051                 :            :                                 {
    1052                 :          0 :                                         i->i.tag = GOTO;
    1053                 :            :                                 }
    1054         [ -  + ]:        117 :                                 else if (i->i.link == i)
    1055                 :            :                                 {
    1056                 :          0 :                                         i->i.tag = GOTO;
    1057                 :        117 :                                         i->i.link = follow;
    1058                 :            :                                 }
    1059                 :            :                         }
    1060                 :            : 
    1061                 :        361 :                         return ;
    1062                 :            :                 }
    1063                 :            :                 else
    1064                 :            :                 {
    1065                 :         50 :                         ++i;
    1066                 :            :                 }
    1067                 :            :         }
    1068                 :            : }
    1069                 :            : 
    1070                 :         24 : CharSet::CharSet()
    1071                 :            :         : fix(0)
    1072                 :            :         , freeHead(0)
    1073                 :            :         , freeTail(0)
    1074         [ +  - ]:         24 :         , rep(new CharPtr[nRealChars])
    1075         [ +  - ]:         24 :         , ptn(new CharPtn[nRealChars])
    1076                 :            : {
    1077         [ +  + ]:       6168 :         for (unsigned j = 0; j < nRealChars; ++j)
    1078                 :            :         {
    1079                 :       6144 :                 rep[j] = &ptn[0];
    1080                 :       6144 :                 ptn[j].nxt = &ptn[j + 1]; /* wrong for j=nRealChars but will be corrected below */
    1081                 :       6144 :                 ptn[j].card = 0;
    1082                 :            :         }
    1083                 :            : 
    1084                 :         24 :         freeHead = &ptn[1];
    1085                 :         24 :         *(freeTail = &ptn[nRealChars - 1].nxt) = NULL;
    1086                 :         24 :         ptn[0].card = nRealChars;
    1087                 :         24 :         ptn[0].nxt = NULL;
    1088                 :         24 : }
    1089                 :            :         
    1090                 :         24 : CharSet::~CharSet()
    1091                 :            : {
    1092         [ +  - ]:         24 :         delete[] rep;
    1093         [ +  - ]:         24 :         delete[] ptn;
    1094                 :         24 : }
    1095                 :            : 
    1096                 :            : // #define RE2C_CGDEBUG
    1097                 :            : 
    1098                 :         24 : DFA* genCode(RegExp *re)
    1099                 :            : {
    1100         [ +  - ]:         24 :         CharSet cs;
    1101                 :            :         unsigned j;
    1102                 :            : 
    1103         [ +  - ]:         24 :         re->split(cs);
    1104                 :            : #ifdef RE2C_CGDEBUG
    1105                 :            :         std::cerr << std::endl << "CS.REP" << std::endl; 
    1106                 :            :             for(unsigned k = 0; k < nRealChars;){
    1107                 :            :                 for(j = k; ++k < nRealChars && cs.rep[k] == cs.rep[j];);
    1108                 :            :                 printSpan(std::cerr, j, k);
    1109                 :            :                 std::cerr << "\t" << cs.rep[j] - &cs.ptn[0] << std::endl;
    1110                 :            :             }
    1111                 :            : #endif
    1112 [ +  - ][ +  - ]:         24 :         Char *rep = new Char[nRealChars];
    1113                 :            : 
    1114         [ +  + ]:       6168 :         for (j = 0; j < nRealChars; ++j)
    1115                 :            :         {
    1116         [ +  + ]:       6144 :                 if (!cs.rep[j]->nxt)
    1117                 :        146 :                         cs.rep[j]->nxt = &cs.ptn[j];
    1118                 :            : 
    1119                 :       6144 :                 rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
    1120                 :            :         }
    1121                 :            : 
    1122         [ +  - ]:         24 :         re->calcSize(rep);
    1123 [ +  - ][ +  - ]:         24 :         Ins *ins = new Ins[re->size + 1];
    1124                 :         24 :         memset(ins, 0, (re->size + 1)*sizeof(Ins));
    1125         [ +  - ]:         24 :         re->compile(rep, ins);
    1126                 :            : 
    1127                 :            : #ifdef RE2C_CGDEBUG
    1128                 :            :         std::cerr << std::endl << "BEFORE EOI" << std::endl;
    1129                 :            :         for (const Ins *inst = &ins[0]; inst < &ins[re->size]; ) {
    1130                 :            :                 inst = showIns(std::cout, *inst, ins[0]);
    1131                 :            :         }
    1132                 :            : #endif
    1133                 :            : 
    1134                 :         24 :         Ins *eoi = &ins[re->size];
    1135                 :         24 :         eoi->i.tag = GOTO;
    1136                 :         24 :         eoi->i.link = eoi;
    1137                 :            : 
    1138                 :            : #ifdef RE2C_CGDEBUG
    1139                 :            :         std::cerr << std::endl << "BEFORE OPT" << std::endl;
    1140                 :            :         for (const Ins *inst = &ins[0]; inst <= &ins[re->size]; ) {
    1141                 :            :                 inst = showIns(std::cout, *inst, ins[0]);
    1142                 :            :         }
    1143                 :            : #endif
    1144                 :            : 
    1145         [ +  - ]:         24 :         optimize(ins);
    1146                 :            : 
    1147                 :            : #ifdef RE2C_CGDEBUG
    1148                 :            :         std::cerr << std::endl << "AFTER OPT" << std::endl;
    1149                 :            :         for (const Ins *inst = &ins[0]; inst <= &ins[re->size]; ) {
    1150                 :            :                 inst = showIns(std::cout, *inst, ins[0]);
    1151                 :            :         }
    1152                 :            : #endif
    1153                 :            : 
    1154         [ +  + ]:        433 :         for (j = 0; j < re->size;)
    1155                 :            :         {
    1156                 :        409 :                 unmark(&ins[j]);
    1157                 :            : 
    1158         [ +  + ]:        409 :                 if (ins[j].i.tag == CHAR)
    1159                 :            :                 {
    1160                 :        163 :                         j = (Ins*) ins[j].i.link - ins;
    1161                 :            :                 }
    1162                 :            :                 else
    1163                 :            :                 {
    1164                 :        246 :                         j++;
    1165                 :            :                 }
    1166                 :            :         }
    1167                 :            : 
    1168 [ +  - ][ +  - ]:         24 :         return new DFA(ins, re->size, 0, nRealChars, rep);
    1169                 :            : }
    1170                 :            : 
    1171 [ +  - ][ +  - ]:       7242 : } // end namespace re2c
    1172                 :            :

Generated by: LCOV version 1.9