URI:
       tcomp.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tcomp.c (4240B)
       ---
            1 #include        "grep.h"
            2 
            3 /*
            4  * incremental compiler.
            5  * add the branch c to the
            6  * state s.
            7  */
            8 void
            9 increment(State *s, int c)
           10 {
           11         int i;
           12         State *t, **tt;
           13         Re *re1, *re2;
           14 
           15         nfollow = 0;
           16         gen++;
           17         matched = 0;
           18         for(i=0; i<s->count; i++)
           19                 fol1(s->re[i], c);
           20         qsort(follow, nfollow, sizeof(*follow), fcmp);
           21         for(tt=&state0; t = *tt;) {
           22                 if(t->count > nfollow) {
           23                         tt = &t->linkleft;
           24                         goto cont;
           25                 }
           26                 if(t->count < nfollow) {
           27                         tt = &t->linkright;
           28                         goto cont;
           29                 }
           30                 for(i=0; i<nfollow; i++) {
           31                         re1 = t->re[i];
           32                         re2 = follow[i];
           33                         if(re1 > re2) {
           34                                 tt = &t->linkleft;
           35                                 goto cont;
           36                         }
           37                         if(re1 < re2) {
           38                                 tt = &t->linkright;
           39                                 goto cont;
           40                         }
           41                 }
           42                 if(!!matched && !t->match) {
           43                         tt = &t->linkleft;
           44                         goto cont;
           45                 }
           46                 if(!matched && !!t->match) {
           47                         tt = &t->linkright;
           48                         goto cont;
           49                 }
           50                 s->next[c] = t;
           51                 return;
           52         cont:;
           53         }
           54 
           55         t = sal(nfollow);
           56         *tt = t;
           57         for(i=0; i<nfollow; i++) {
           58                 re1 = follow[i];
           59                 t->re[i] = re1;
           60         }
           61         s->next[c] = t;
           62         t->match = matched;
           63 }
           64 
           65 int
           66 fcmp(const void *va, const void *vb)
           67 {
           68         Re **aa, **bb;
           69         Re *a, *b;
           70 
           71         aa = (Re**)va;
           72         bb = (Re**)vb;
           73         a = *aa;
           74         b = *bb;
           75         if(a > b)
           76                 return 1;
           77         if(a < b)
           78                 return -1;
           79         return 0;
           80 }
           81 
           82 void
           83 fol1(Re *r, int c)
           84 {
           85         Re *r1;
           86 
           87 loop:
           88         if(r->gen == gen)
           89                 return;
           90         if(nfollow >= maxfollow)
           91                 error("nfollow");
           92         r->gen = gen;
           93         switch(r->type) {
           94         default:
           95                 error("fol1");
           96 
           97         case Tcase:
           98                 if(c >= 0 && c < 256)
           99                 if(r1 = r->u.cases[c])
          100                         follow[nfollow++] = r1;
          101                 if(r = r->next)
          102                         goto loop;
          103                 break;
          104 
          105         case Talt:
          106         case Tor:
          107                 fol1(r->u.alt, c);
          108                 r = r->next;
          109                 goto loop;
          110 
          111         case Tbegin:
          112                 if(c == '\n' || c == Cbegin)
          113                         follow[nfollow++] = r->next;
          114                 break;
          115 
          116         case Tend:
          117                 if(c == '\n') {
          118                         if(r->next == 0) {
          119                                 matched = 1;
          120                                 break;
          121                         }
          122                         r = r->next;
          123                         goto loop;
          124                 }
          125                 break;
          126 
          127         case Tclass:
          128                 if(c >= r->u.x.lo && c <= r->u.x.hi)
          129                         follow[nfollow++] = r->next;
          130                 break;
          131         }
          132 }
          133 
          134 Rune        tab1[] =
          135 {
          136         0x007f,
          137         0x07ff
          138 };
          139 Rune        tab2[] =
          140 {
          141         0x003f,
          142         0x0fff
          143 };
          144 
          145 Re2
          146 rclass(Rune p0, Rune p1)
          147 {
          148         char xc0[6], xc1[6];
          149         int i, n, m;
          150         Re2 x;
          151 
          152         if(p0 > p1)
          153                 return re2char(0xff, 0xff);        /* no match */
          154 
          155         /*
          156          * bust range into same length
          157          * character sequences
          158          */
          159         for(i=0; i<nelem(tab1); i++) {
          160                 m = tab1[i];
          161                 if(p0 <= m && p1 > m)
          162                         return re2or(rclass(p0, m), rclass(m+1, p1));
          163         }
          164 
          165         /*
          166          * bust range into part of a single page
          167          * or into full pages
          168          */
          169         for(i=0; i<nelem(tab2); i++) {
          170                 m = tab2[i];
          171                 if((p0 & ~m) != (p1 & ~m)) {
          172                         if((p0 & m) != 0)
          173                                 return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));
          174                         if((p1 & m) != m)
          175                                 return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));
          176                 }
          177         }
          178 
          179         n = runetochar(xc0, &p0);
          180         i = runetochar(xc1, &p1);
          181         if(i != n)
          182                 error("length");
          183 
          184         x = re2char(xc0[0], xc1[0]);
          185         for(i=1; i<n; i++)
          186                 x = re2cat(x, re2char(xc0[i], xc1[i]));
          187         return x;
          188 }
          189 
          190 int
          191 pcmp(const void *va, const void *vb)
          192 {
          193         int n;
          194         Rune *a, *b;
          195 
          196         a = (Rune*)va;
          197         b = (Rune*)vb;
          198 
          199         n = a[0] - b[0];
          200         if(n)
          201                 return n;
          202         return a[1] - b[1];
          203 }
          204 
          205 /*
          206  * convert character chass into
          207  * run-pair ranges of matches.
          208  * this is 10646/utf specific and
          209  * needs to be changed for some
          210  * other input character set.
          211  * this is the key to a fast
          212  * regular search of characters
          213  * by looking at sequential bytes.
          214  */
          215 Re2
          216 re2class(char *s)
          217 {
          218         Rune pairs[200], *p, *q, ov;
          219         int nc;
          220         Re2 x;
          221 
          222         nc = 0;
          223         if(*s == '^') {
          224                 nc = 1;
          225                 s++;
          226         }
          227 
          228         p = pairs;
          229         s += chartorune(p, s);
          230         for(;;) {
          231                 if(*p == '\\')
          232                         s += chartorune(p, s);
          233                 if(*p == 0)
          234                         break;
          235                 p[1] = *p;
          236                 p += 2;
          237                 s += chartorune(p, s);
          238                 if(*p != '-')
          239                         continue;
          240                 s += chartorune(p, s);
          241                 if(*p == '\\')
          242                         s += chartorune(p, s);
          243                 if(*p == 0)
          244                         break;
          245                 p[-1] = *p;
          246                 s += chartorune(p, s);
          247         }
          248         *p = 0;
          249         qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);
          250 
          251         q = pairs;
          252         for(p=pairs+2; *p; p+=2) {
          253                 if(p[0] > p[1])
          254                         continue;
          255                 if(p[0] > q[1] || p[1] < q[0]) {
          256                         q[2] = p[0];
          257                         q[3] = p[1];
          258                         q += 2;
          259                         continue;
          260                 }
          261                 if(p[0] < q[0])
          262                         q[0] = p[0];
          263                 if(p[1] > q[1])
          264                         q[1] = p[1];
          265         }
          266         q[2] = 0;
          267 
          268         p = pairs;
          269         if(nc) {
          270                 x = rclass(0, p[0]-1);
          271                 ov = p[1]+1;
          272                 for(p+=2; *p; p+=2) {
          273                         x = re2or(x, rclass(ov, p[0]-1));
          274                         ov = p[1]+1;
          275                 }
          276                 x = re2or(x, rclass(ov, 0xffff));
          277         } else {
          278                 x = rclass(p[0], p[1]);
          279                 for(p+=2; *p; p+=2)
          280                         x = re2or(x, rclass(p[0], p[1]));
          281         }
          282         return x;
          283 }