URI:
       tsprog.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tsprog.c (23115B)
       ---
            1 #include <u.h>
            2 #include <libc.h>
            3 #include <bio.h>
            4 #include <ctype.h>
            5 #include "code.h"
            6 
            7 /* fig leaves for possibly signed char quantities */
            8 #define ISUPPER(c)        isupper((c)&0xff)
            9 #define ISLOWER(c)        islower((c)&0xff)
           10 #define        ISALPHA(c)        isalpha((c)&0xff)
           11 #define        ISDIGIT(c)        isdigit((c)&0xff)
           12 #define ISVOWEL(c)        voweltab[(c)&0xff]
           13 #define Tolower(c)        (ISUPPER(c)? (c)-'A'+'a': (c))
           14 #define pair(a,b)        (((a)<<8) | (b))
           15 #define DLEV                2
           16 #define DSIZ                40
           17 
           18 typedef        long        Bits;
           19 #define        Set(h, f)        ((long)(h) & (f))
           20 
           21 Bits         nop(char*, char*, char*, int, int);
           22 Bits         strip(char*, char*, char*, int, int);
           23 Bits         ize(char*, char*, char*, int, int);
           24 Bits         i_to_y(char*, char*, char*, int, int);
           25 Bits         ily(char*, char*, char*, int, int);
           26 Bits         subst(char*, char*, char*, int, int);
           27 Bits         CCe(char*, char*, char*, int, int);
           28 Bits         tion(char*, char*, char*, int, int);
           29 Bits         an(char*, char*, char*, int, int);
           30 Bits         s(char*, char*, char*, int, int);
           31 Bits         es(char*, char*, char*, int, int);
           32 Bits         bility(char*, char*, char*, int, int);
           33 Bits         y_to_e(char*, char*, char*, int, int);
           34 Bits         VCe(char*, char*, char*, int, int);
           35 
           36 Bits         trypref(char*, char*, int, int);
           37 Bits        tryword(char*, char*, int, int);
           38 Bits         trysuff(char*, int, int);
           39 Bits        dict(char*, char*);
           40 void        typeprint(Bits);
           41 void        pcomma(char*);
           42 
           43 void        ise(void);
           44 int        ordinal(void);
           45 char*        skipv(char*);
           46 int        inun(char*, Bits);
           47 char*        ztos(char*);
           48 void        readdict(char*);
           49 
           50 typedef        struct        Ptab        Ptab;
           51 struct        Ptab
           52 {
           53         char*        s;
           54         int        flag;
           55 };
           56 
           57 typedef        struct        Suftab        Suftab;
           58 struct        Suftab
           59 {
           60         char        *suf;
           61         Bits        (*p1)(char*, char*, char*, int, int);
           62         int        n1;
           63         char        *d1;
           64         char        *a1;
           65         int        flag;
           66         int        affixable;
           67         Bits        (*p2)(char*, char*, char*, int, int);
           68         int        n2;
           69         char        *d2;
           70         char        *a2;
           71 };
           72 
           73 Suftab        staba[] = {
           74         {"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
           75         0
           76 };
           77 
           78 Suftab        stabc[] =
           79 {
           80         {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
           81         {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
           82         {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
           83         {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
           84         {"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
           85         {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
           86         {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
           87         {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
           88         {"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
           89         0
           90 };
           91 Suftab        stabd[] =
           92 {
           93         {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
           94         {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
           95         0
           96 };
           97 Suftab        stabe[] =
           98 {
           99         /*
          100          * V_affix for comment ->commence->commentment??
          101          */
          102         {"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
          103         {"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
          104         {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
          105         {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
          106         {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
          107         {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
          108         {"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
          109         0
          110 };
          111 Suftab        stabg[] =
          112 {
          113         {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
          114         {"gnikam",strip,6,"","+making",NOUN,NOUN},
          115         {"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
          116         {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
          117         0
          118 };
          119 Suftab        stabl[] =
          120 {
          121         {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
          122         {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
          123         {"latnem",strip,2,"","+al",N_AFFIX,ADJ},
          124         {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
          125         {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
          126         0
          127 };
          128 Suftab        stabm[] =
          129 {
          130                 /* congregational + ism */
          131         {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
          132         {"margo",subst,-1,"-ph+m","",NOUN,NOUN},
          133         0
          134 };
          135 Suftab        stabn[] =
          136 {
          137         {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
          138         {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
          139         {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
          140         {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
          141         {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
          142         {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
          143         {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
          144         {"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
          145         {"nem",strip,3,"","+man",MAN,PROP_COLLECT},
          146         {"nosrep",strip,6,"","+person",MAN,PROP_COLLECT},
          147         0
          148 };
          149 Suftab        stabp[] =
          150 {
          151         {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
          152         0
          153 };
          154 Suftab        stabr[] =
          155 {
          156         {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
          157         {"reyhparg",nop,0,"","",0,NOUN},
          158         {"reyl",nop,0,"","",0,NOUN},
          159         {"rekam",strip,5,"","+maker",NOUN,NOUN},
          160         {"repeek",strip,6,"","+keeper",NOUN,NOUN},
          161         {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ,        i_to_y,2,"-y+ier","+er"},
          162         {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
          163         {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
          164         {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
          165         0
          166 };
          167 Suftab        stabs[] =
          168 {
          169         {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
          170         {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
          171         {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH ,        es,2,"-y+ies","+es"},
          172         {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
          173         {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH  },
          174         0
          175 };
          176 Suftab        stabt[] =
          177 {
          178         {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
          179         {"tse",strip,2,"","+st",EST,DONT_TOUCH,        i_to_y,3,"-y+iest","+est" },
          180         {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
          181         {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
          182         0
          183 };
          184 Suftab        staby[] =
          185 {
          186         {"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
          187         {"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
          188         {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
          189         {"ytisuo",nop,0,"","",NOUN},
          190         {"ytilb",nop,0,"","",0,NOUN},
          191         {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
          192         {"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
          193         {"ylc",nop,0,"","",0},
          194         {"ylelb",nop,0,"","",0},
          195         {"ylelp",nop,0,"","",0},
          196         {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
          197         {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
          198         {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
          199         0
          200 };
          201 Suftab        stabz[] =
          202 {
          203         0
          204 };
          205 Suftab*        suftab[] =
          206 {
          207         staba,
          208         stabz,
          209         stabc,
          210         stabd,
          211         stabe,
          212         stabz,
          213         stabg,
          214         stabz,
          215         stabz,
          216         stabz,
          217         stabz,
          218         stabl,
          219         stabm,
          220         stabn,
          221         stabz,
          222         stabp,
          223         stabz,
          224         stabr,
          225         stabs,
          226         stabt,
          227         stabz,
          228         stabz,
          229         stabz,
          230         stabz,
          231         staby,
          232         stabz
          233 };
          234 
          235 Ptab        ptaba[] =
          236 {
          237         "anti", 0,
          238         "auto", 0,
          239         0
          240 };
          241 Ptab        ptabb[] =
          242 {
          243         "bio", 0,
          244         0
          245 };
          246 Ptab        ptabc[] =
          247 {
          248         "counter", 0,
          249         0
          250 };
          251 Ptab        ptabd[] =
          252 {
          253         "dis", 0,
          254         0
          255 };
          256 Ptab        ptabe[] =
          257 {
          258         "electro", 0,
          259         0
          260 };
          261 Ptab        ptabf[] =
          262 {
          263         "femto", 0,
          264         0
          265 };
          266 Ptab        ptabg[] =
          267 {
          268         "geo", 0,
          269         "giga", 0,
          270         0
          271 };
          272 Ptab        ptabh[] =
          273 {
          274         "hyper", 0,
          275         0
          276 };
          277 Ptab        ptabi[] =
          278 {
          279         "immuno", 0,
          280         "im", IN,
          281         "intra", 0,
          282         "inter", 0,
          283         "in", IN,
          284         "ir", IN,
          285         "iso", 0,
          286         0
          287 };
          288 Ptab        ptabj[] =
          289 {
          290         0
          291 };
          292 Ptab        ptabk[] =
          293 {
          294         "kilo", 0,
          295         0
          296 };
          297 Ptab        ptabl[] =
          298 {
          299         0
          300 };
          301 Ptab        ptabm[] =
          302 {
          303         "magneto", 0,
          304         "mega", 0,
          305         "meta", 0,
          306         "micro", 0,
          307         "mid", 0,
          308         "milli", 0,
          309         "mini", 0,
          310         "mis", 0,
          311         "mono", 0,
          312         "multi", 0,
          313         0
          314 };
          315 Ptab        ptabn[] =
          316 {
          317         "nano", 0,
          318         "neuro", 0,
          319         "non", 0,
          320         0
          321 };
          322 Ptab        ptabo[] =
          323 {
          324         "out", 0,
          325         "over", 0,
          326         0
          327 };
          328 Ptab        ptabp[] =
          329 {
          330         "para", 0,
          331         "photo", 0,
          332         "pico", 0,
          333         "poly", 0,
          334         "pre", 0,
          335         "pseudo", 0,
          336         "psycho", 0,
          337         0
          338 };
          339 Ptab        ptabq[] =
          340 {
          341         "quasi", 0,
          342         0
          343 };
          344 Ptab        ptabr[] =
          345 {
          346         "radio", 0,
          347         "re", 0,
          348         0
          349 };
          350 Ptab        ptabs[] =
          351 {
          352         "semi", 0,
          353         "stereo", 0,
          354         "sub", 0,
          355         "super", 0,
          356         0
          357 };
          358 Ptab        ptabt[] =
          359 {
          360         "tele", 0,
          361         "tera", 0,
          362         "thermo", 0,
          363         0
          364 };
          365 Ptab        ptabu[] =
          366 {
          367         "ultra", 0,
          368         "under", 0,        /*must precede un*/
          369         "un", IN,
          370         0
          371 };
          372 Ptab        ptabv[] =
          373 {
          374         0
          375 };
          376 Ptab        ptabw[] =
          377 {
          378         0
          379 };
          380 Ptab        ptabx[] =
          381 {
          382         0
          383 };
          384 Ptab        ptaby[] =
          385 {
          386         0
          387 };
          388 Ptab        ptabz[] =
          389 {
          390         0
          391 };
          392 
          393 Ptab*        preftab[] =
          394 {
          395         ptaba,
          396         ptabb,
          397         ptabc,
          398         ptabd,
          399         ptabe,
          400         ptabf,
          401         ptabg,
          402         ptabh,
          403         ptabi,
          404         ptabj,
          405         ptabk,
          406         ptabl,
          407         ptabm,
          408         ptabn,
          409         ptabo,
          410         ptabp,
          411         ptabq,
          412         ptabr,
          413         ptabs,
          414         ptabt,
          415         ptabu,
          416         ptabv,
          417         ptabw,
          418         ptabx,
          419         ptaby,
          420         ptabz
          421 };
          422 
          423 typedef struct {
          424         char *mesg;
          425         enum { NONE, SUFF, PREF} type;
          426 } Deriv;
          427 
          428 int        aflag;
          429 int        cflag;
          430 int        fflag;
          431 int        vflag;
          432 int        xflag;
          433 int         nflag;
          434 char        word[500];
          435 char*        original;
          436 Deriv        emptyderiv;
          437 Deriv        deriv[DSIZ+3];
          438 char        affix[DSIZ*10];        /* 10 is longest affix message */
          439 int        prefcount;
          440 int         suffcount;
          441 char*        acmeid;
          442 char        space[300000];        /* must be as large as "words"+"space" in pcode run */
          443 Bits        encode[2048];        /* must be as long as "codes" in pcode run */
          444 int        nencode;
          445 char        voweltab[256];
          446 char*        spacep[128*128+1];        /* pointer to words starting with 'xx' */
          447 Biobuf        bin;
          448 Biobuf        bout;
          449 
          450 char*        codefile = "#9/lib/amspell";
          451 char*        brfile = "#9/lib/brspell";
          452 char*        Usage = "usage";
          453 
          454 void
          455 main(int argc, char *argv[])
          456 {
          457         char *ep, *cp;
          458         char *dp;
          459         int j, i, c;
          460         int low;
          461         Bits h;
          462 
          463         codefile = unsharp(codefile);
          464         brfile = unsharp(brfile);
          465 
          466         Binit(&bin, 0, OREAD);
          467         Binit(&bout, 1, OWRITE);
          468         for(i=0; c = "aeiouyAEIOUY"[i]; i++)
          469                 voweltab[c] = 1;
          470         while(argc > 1) {
          471                 if(argv[1][0] != '-')
          472                         break;
          473                 for(i=1; c = argv[1][i]; i++)
          474                 switch(c) {
          475                 default:
          476                         fprint(2, "usage: spell [-bcCvx] [-f file]\n");
          477                         exits(Usage);
          478 
          479                 case 'a':
          480                         aflag++;
          481                         continue;
          482 
          483                 case 'b':
          484                         ise();
          485                         if(!fflag)
          486                                 codefile = brfile;
          487                         continue;
          488 
          489                 case 'C':                /* for "correct" */
          490                         vflag++;
          491                 case 'c':                /* for ocr */
          492                         cflag++;
          493                         continue;
          494 
          495                 case 'v':
          496                         vflag++;
          497                         continue;
          498 
          499                 case 'x':
          500                         xflag++;
          501                         continue;
          502 
          503                 case 'f':
          504                         if(argc <= 2) {
          505                                 fprint(2, "spell: -f requires another argument\n");
          506                                 exits(Usage);
          507                         }
          508                         argv++;
          509                         argc--;
          510                         codefile = argv[1];
          511                         fflag++;
          512                         goto brk;
          513                 }
          514         brk:
          515                 argv++;
          516                 argc--;
          517         }
          518         readdict(codefile);
          519         if(argc > 1) {
          520                 fprint(2, "usage: spell [-bcCvx] [-f file]\n");
          521                 exits(Usage);
          522         }
          523         if(aflag)
          524                 cflag = vflag = 0;
          525 
          526         for(;;) {
          527                 affix[0] = 0;
          528                 original = Brdline(&bin, '\n');
          529                 if(original == 0)
          530                         exits(0);
          531                 original[Blinelen(&bin)-1] = 0;
          532                 low = 0;
          533 
          534                 if(aflag) {
          535                         acmeid = original;
          536                         while(*original != ':')
          537                                 if(*original++ == 0)
          538                                         exits(0);
          539                         while(*++original != ':')
          540                                 if(*original == 0)
          541                                         exits(0);
          542                         *original++ = 0;
          543                 }
          544                 for(ep=word,dp=original; j = *dp; ep++,dp++) {
          545                         if(ISLOWER(j))
          546                                 low++;
          547                         if(ep >= word+sizeof(word)-1)
          548                                 break;
          549                         *ep = j;
          550                 }
          551                 *ep = 0;
          552 
          553                 if(ISDIGIT(word[0]) && ordinal())
          554                         continue;
          555 
          556                 h = 0;
          557                 if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
          558                         for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
          559                                 *dp = Tolower(*cp);
          560                 if(!h)
          561                 for(;;) {        /* at most twice */
          562                         if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
          563                                 break;
          564                         if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
          565                                 break;
          566                         if(!ISUPPER(word[0]))
          567                                 break;
          568                         cp = original;
          569                         dp = word;
          570                         while(*dp = *cp++) {
          571                                         if(!low)
          572                                                 *dp = Tolower(*dp);
          573                                 dp++;
          574                         }
          575                         word[0] = Tolower(word[0]);
          576                 }
          577 
          578                 if(cflag) {
          579                         if(!h || Set(h,STOP))
          580                                 print("-");
          581                         else if(!vflag)
          582                                 print("+");
          583                         else
          584                                 print("%c",'0' + (suffcount>0) +
          585                                    (prefcount>4? 8: 2*prefcount));
          586                 } else if(!h || Set(h,STOP)) {
          587                         if(aflag)
          588                                 Bprint(&bout, "%s:%s\n", acmeid, original);
          589                         else
          590                                 Bprint(&bout, "%s\n", original);
          591                 } else if(affix[0] != 0 && affix[0] != '.')
          592                         print("%s\t%s\n", affix, original);
          593         }
          594 }
          595 
          596 /*        strip exactly one suffix and do
          597  *        indicated routine(s), which may recursively
          598  *        strip suffixes
          599  */
          600 Bits
          601 trysuff(char* ep, int lev, int flag)
          602 {
          603         Suftab *t;
          604         char *cp, *sp;
          605         Bits h = 0;
          606         int initchar = ep[-1];
          607 
          608         flag &= ~MONO;
          609         lev += DLEV;
          610         if(lev < DSIZ) {
          611                 deriv[lev]  = emptyderiv;
          612                 deriv[lev-1] = emptyderiv;
          613         }
          614         if(!ISLOWER(initchar))
          615                 return h;
          616         for(t=suftab[initchar-'a']; sp=t->suf; t++) {
          617                 cp = ep;
          618                 while(*sp)
          619                         if(*--cp != *sp++)
          620                                 goto next;
          621                 for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);)
          622                         ;
          623                 if(sp < word)
          624                         continue;
          625                 if(!(t->affixable & flag))
          626                         return 0;
          627                 h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
          628                 if(!h && t->p2!=0) {
          629                         if(lev < DSIZ) {
          630                                 deriv[lev] = emptyderiv;
          631                                 deriv[lev+1] = emptyderiv;
          632                         }
          633                         h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
          634                 }
          635                 break;
          636         next:;
          637         }
          638         return h;
          639 }
          640 
          641 Bits
          642 nop(char* ep, char* d, char* a, int lev, int flag)
          643 {
          644         USED(ep);
          645         USED(d);
          646         USED(a);
          647         USED(lev);
          648         USED(flag);
          649         return 0;
          650 }
          651 
          652 Bits
          653 cstrip(char* ep, char* d, char* a, int lev, int flag)
          654 {
          655         int temp = ep[0];
          656 
          657         if(ISVOWEL(temp) && ISVOWEL(ep[-1])) {
          658                 switch(pair(ep[-1],ep[0])) {
          659                 case pair('a', 'a'):
          660                 case pair('a', 'e'):
          661                 case pair('a', 'i'):
          662                 case pair('e', 'a'):
          663                 case pair('e', 'e'):
          664                 case pair('e', 'i'):
          665                 case pair('i', 'i'):
          666                 case pair('o', 'a'):
          667                         return 0;
          668                 }
          669         } else
          670         if(temp==ep[-1]&&temp==ep[-2])
          671                 return 0;
          672         return strip(ep,d,a,lev,flag);
          673 }
          674 
          675 Bits
          676 strip(char* ep, char* d, char* a, int lev, int flag)
          677 {
          678         Bits h = trypref(ep, a, lev, flag);
          679 
          680         USED(d);
          681         if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2]))
          682                 h = 0;
          683         if(h)
          684                 return h;
          685         if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) {
          686                 h = trypref(ep-1,a,lev,flag|MONO);
          687                 if(h)
          688                         return h;
          689         }
          690         return trysuff(ep,lev,flag);
          691 }
          692 
          693 Bits
          694 s(char* ep, char* d, char* a, int lev, int flag)
          695 {
          696         if(lev > DLEV+1)
          697                 return 0;
          698         if(*ep=='s') {
          699                 switch(ep[-1]) {
          700                 case 'y':
          701                         if(ISVOWEL(ep[-2])||ISUPPER(*word))
          702                                 break;        /*says Kennedys*/
          703                 case 'x':
          704                 case 'z':
          705                 case 's':
          706                         return 0;
          707                 case 'h':
          708                         switch(ep[-2]) {
          709                         case 'c':
          710                         case 's':
          711                                 return 0;
          712                         }
          713                 }
          714         }
          715         return strip(ep,d,a,lev,flag);
          716 }
          717 
          718 Bits
          719 an(char* ep, char* d, char* a, int lev, int flag)
          720 {
          721         USED(d);
          722         if(!ISUPPER(*word))        /*must be proper name*/
          723                 return 0;
          724         return trypref(ep,a,lev,flag);
          725 }
          726 
          727 Bits
          728 ize(char* ep, char* d, char* a, int lev, int flag)
          729 {
          730         int temp = ep[-1];
          731         Bits h;
          732 
          733         USED(a);
          734         ep[-1] = 'e';
          735         h = strip(ep,"",d,lev,flag);
          736         ep[-1] = temp;
          737         return h;
          738 }
          739 
          740 Bits
          741 y_to_e(char* ep, char* d, char* a, int lev, int flag)
          742 {
          743         Bits h;
          744         int  temp;
          745 
          746         USED(a);
          747         switch(ep[-1]) {
          748         case 'a':
          749         case 'e':
          750         case 'i':
          751                 return 0;
          752         }
          753         temp = *ep;
          754         *ep++ = 'e';
          755         h = strip(ep,"",d,lev,flag);
          756         ep[-1] = temp;
          757         return h;
          758 }
          759 
          760 Bits
          761 ily(char* ep, char* d, char* a, int lev, int flag)
          762 {
          763         int temp = ep[0];
          764         char *cp = ep;
          765 
          766         if(temp==ep[-1]&&temp==ep[-2])                /* sillly */
          767                 return 0;
          768         if(*--cp=='y' && !ISVOWEL(*--cp))        /* happyly */
          769                 while(cp>word)
          770                         if(ISVOWEL(*--cp))        /* shyness */
          771                                 return 0;
          772         if(ep[-1]=='i')
          773                 return i_to_y(ep,d,a,lev,flag);
          774         return cstrip(ep,d,a,lev,flag);
          775 }
          776 
          777 Bits
          778 bility(char* ep, char* d, char* a, int lev, int flag)
          779 {
          780         *ep++ = 'l';
          781         return y_to_e(ep,d,a,lev,flag);
          782 }
          783 
          784 Bits
          785 i_to_y(char* ep, char* d, char* a, int lev, int flag)
          786 {
          787         Bits h;
          788         int temp;
          789 
          790         if(ISUPPER(*word))
          791                 return 0;
          792         if((temp=ep[-1])=='i' && !ISVOWEL(ep[-2])) {
          793                 ep[-1] = 'y';
          794                 a = d;
          795         }
          796         h = cstrip(ep,"",a,lev,flag);
          797         ep[-1] = temp;
          798         return h;
          799 }
          800 
          801 Bits
          802 es(char* ep, char* d, char* a, int lev, int flag)
          803 {
          804         if(lev>DLEV)
          805                 return 0;
          806         switch(ep[-1]) {
          807         default:
          808                 return 0;
          809         case 'i':
          810                 return i_to_y(ep,d,a,lev,flag);
          811         case 'h':
          812                 switch(ep[-2]) {
          813                 default:
          814                         return 0;
          815                 case 'c':
          816                 case 's':
          817                         break;
          818                 }
          819         case 's':
          820         case 'z':
          821         case 'x':
          822                 return strip(ep,d,a,lev,flag);
          823         }
          824 }
          825 
          826 Bits
          827 subst(char* ep, char* d, char* a, int lev, int flag)
          828 {
          829         char *u,*t;
          830         Bits h;
          831 
          832         USED(a);
          833         if(skipv(skipv(ep-1)) < word)
          834                 return 0;
          835         for(t=d; *t!='+'; t++)
          836                 continue;
          837         for(u=ep; *--t!='-';)
          838                 *--u = *t;
          839         h = strip(ep,"",d,lev,flag);
          840         while(*++t != '+')
          841                 continue;
          842         while(*++t)
          843                 *u++ = *t;
          844         return h;
          845 }
          846 
          847 Bits
          848 tion(char* ep, char* d, char* a, int lev, int flag)
          849 {
          850         switch(ep[-2]) {
          851         default:
          852                 return trypref(ep,a,lev,flag);
          853         case 'a':
          854         case 'e':
          855         case 'i':
          856         case 'o':
          857         case 'u':
          858                 return y_to_e(ep,d,a,lev,flag);
          859         }
          860 }
          861 
          862 /*
          863  * possible consonant-consonant-e ending
          864  */
          865 Bits
          866 CCe(char* ep, char* d, char* a, int lev, int flag)
          867 {
          868         Bits h;
          869 
          870         switch(ep[-1]) {
          871         case 'l':
          872                 if(ISVOWEL(ep[-2]))
          873                         break;
          874                 switch(ep[-2]) {
          875                 case 'l':
          876                 case 'r':
          877                 case 'w':
          878                         break;
          879                 default:
          880                         return y_to_e(ep,d,a,lev,flag);
          881                 }
          882                 break;
          883         case 'c':
          884         case 'g':
          885                 if(*ep == 'a')        /* prevent -able for -eable */
          886                         return 0;
          887         case 's':
          888         case 'v':
          889         case 'z':
          890                 if(ep[-2]==ep[-1])
          891                         break;
          892                 if(ISVOWEL(ep[-2]))
          893                         break;
          894         case 'u':
          895                 if(h = y_to_e(ep,d,a,lev,flag))
          896                         return h;
          897                 if(!(ep[-2]=='n' && ep[-1]=='g'))
          898                         return 0;
          899         }
          900         return VCe(ep,d,a,lev,flag);
          901 }
          902 
          903 /*
          904  * possible consonant-vowel-consonant-e ending
          905  */
          906 Bits
          907 VCe(char* ep, char* d, char* a, int lev, int flag)
          908 {
          909         int c;
          910         Bits h;
          911 
          912         c = ep[-1];
          913         if(c=='e')
          914                 return 0;
          915         if(!ISVOWEL(c) && ISVOWEL(ep[-2])) {
          916                 c = *ep;
          917                 *ep++ = 'e';
          918                 h = trypref(ep,d,lev,flag);
          919                 if(!h)
          920                         h = trysuff(ep,lev,flag);
          921                 if(h)
          922                         return h;
          923                 ep--;
          924                 *ep = c;
          925         }
          926         return cstrip(ep,d,a,lev,flag);
          927 }
          928 
          929 Ptab*
          930 lookuppref(uchar** wp, char* ep)
          931 {
          932         Ptab *sp;
          933         uchar *bp,*cp;
          934         unsigned int initchar = Tolower(**wp);
          935 
          936         if(!ISALPHA(initchar))
          937                 return 0;
          938         for(sp=preftab[initchar-'a'];sp->s;sp++) {
          939                 bp = *wp;
          940                 for(cp= (uchar*)sp->s;*cp; )
          941                         if(*bp++!=*cp++)
          942                                 goto next;
          943                 for(cp=bp;cp<(uchar*)ep;cp++)
          944                         if(ISVOWEL(*cp)) {
          945                                 *wp = bp;
          946                                 return sp;
          947                         }
          948         next:;
          949         }
          950         return 0;
          951 }
          952 
          953 /*        while word is not in dictionary try stripping
          954  *        prefixes. Fail if no more prefixes.
          955  */
          956 Bits
          957 trypref(char* ep, char* a, int lev, int flag)
          958 {
          959         Ptab *tp;
          960         char *bp, *cp;
          961         char *pp;
          962         Bits h;
          963         char space[20];
          964 
          965         if(lev<DSIZ) {
          966                 deriv[lev].mesg = a;
          967                 deriv[lev].type = *a=='.'? NONE: SUFF;
          968         }
          969         if(h = tryword(word,ep,lev,flag)) {
          970                 if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
          971                         return h;
          972                 h = 0;
          973         }
          974         bp = word;
          975         pp = space;
          976         if(lev<DSIZ) {
          977                 deriv[lev+1].mesg = pp;
          978                 deriv[lev+1].type = 0;
          979         }
          980         while(tp=lookuppref((uchar**)(void*)&bp,ep)) {
          981                 *pp++ = '+';
          982                 cp = tp->s;
          983                 while(pp<space+sizeof(space) && (*pp = *cp++))
          984                         pp++;
          985                 deriv[lev+1].type += PREF;
          986                 h = tryword(bp,ep,lev+1,flag);
          987                 if(Set(h,NOPREF) ||
          988                    ((tp->flag&IN) && inun(bp-2,h)==0)) {
          989                         h = 0;
          990                         break;
          991                 }
          992                 if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
          993                         break;
          994                 h = 0;
          995         }
          996         if(lev < DSIZ) {
          997                 deriv[lev+1] = emptyderiv;
          998                 deriv[lev+2] = emptyderiv;
          999         }
         1000         return h;
         1001 }
         1002 
         1003 Bits
         1004 tryword(char* bp, char* ep, int lev, int flag)
         1005 {
         1006         int  j;
         1007         Bits h = 0;
         1008         char duple[3];
         1009 
         1010         if(ep-bp <= 1)
         1011                 return h;
         1012         if(flag&MONO) {
         1013                 if(lev<DSIZ) {
         1014                         deriv[++lev].mesg = duple;
         1015                         deriv[lev].type = SUFF;
         1016                 }
         1017                 duple[0] = '+';
         1018                 duple[1] = *ep;
         1019                 duple[2] = 0;
         1020         }
         1021         h = dict(bp, ep);
         1022         if(vflag==0 || h==0)
         1023                 return h;
         1024         /*
         1025          * when derivations are wanted, collect them
         1026          * for printing
         1027          */
         1028         j = lev;
         1029         prefcount = suffcount = 0;
         1030         do {
         1031                 if(j<DSIZ && deriv[j].type) {
         1032                         strcat(affix, deriv[j].mesg);
         1033                         if(deriv[j].type == SUFF)
         1034                                 suffcount++;
         1035                         else if(deriv[j].type != NONE)
         1036                                 prefcount = deriv[j].type/PREF;
         1037                 }
         1038         } while(--j > 0);
         1039         return h;
         1040 }
         1041 
         1042 int
         1043 inun(char* bp, Bits h)
         1044 {
         1045         if(*bp == 'u')
         1046                 return Set(h, IN) == 0;
         1047         /* *bp == 'i' */
         1048         if(Set(h, IN) == 0)
         1049                 return 0;
         1050         switch(bp[2]) {
         1051         case 'r':
         1052                 return bp[1] == 'r';
         1053         case 'm':
         1054         case 'p':
         1055                 return bp[1] == 'm';
         1056         }
         1057         return bp[1] == 'n';
         1058 }
         1059 
         1060 char*
         1061 skipv(char *s)
         1062 {
         1063         if(s >= word && ISVOWEL(*s))
         1064                 s--;
         1065         while(s >= word && !ISVOWEL(*s))
         1066                 s--;
         1067         return s;
         1068 }
         1069 
         1070 /*
         1071  * crummy way to Britishise
         1072  */
         1073 void
         1074 ise(void)
         1075 {
         1076         Suftab *p;
         1077         int i;
         1078 
         1079         for(i=0; i<26; i++)
         1080                 for(p = suftab[i]; p->suf; p++) {
         1081                         p->suf = ztos(p->suf);
         1082                         p->d1 = ztos(p->d1);
         1083                         p->a1 = ztos(p->a1);
         1084                 }
         1085 }
         1086 
         1087 char*
         1088 ztos(char *as)
         1089 {
         1090         char *s, *ds;
         1091 
         1092         for(s=as; *s; s++)
         1093                 if(*s == 'z')
         1094                         goto copy;
         1095         return as;
         1096 
         1097 copy:
         1098         ds = strdup(as);
         1099         for(s=ds; *s; s++)
         1100                 if(*s == 'z')
         1101                         *s = 's';
         1102         return ds;
         1103 }
         1104 
         1105 Bits
         1106 dict(char* bp, char* ep)
         1107 {
         1108         char *cp, *cp1, *w, *wp, *we;
         1109         int n, f;
         1110 
         1111         w = bp;
         1112         we = ep;
         1113         n = ep-bp;
         1114         if(n <= 1)
         1115                 return NOUN;
         1116 
         1117         f = w[0] & 0x7f;
         1118         f *= 128;
         1119         f += w[1] & 0x7f;
         1120         bp = spacep[f];
         1121         ep = spacep[f+1];
         1122 
         1123 loop:
         1124         if(bp >= ep) {
         1125                 if(xflag)
         1126                         fprint(2, "=%.*s\n", utfnlen(w, n), w);
         1127                 return 0;
         1128         }
         1129         /*
         1130          * find the beginning of some word in the middle
         1131          */
         1132         cp = bp + (ep-bp)/2;
         1133 
         1134         while(cp > bp && !(*cp & 0x80))
         1135                 cp--;
         1136         while(cp > bp && (cp[-1] & 0x80))
         1137                 cp--;
         1138 
         1139         wp = w + 2;        /* skip two letters */
         1140         cp1 = cp + 2;        /* skip affix code */
         1141         for(;;) {
         1142                 if(wp >= we) {
         1143                         if(*cp1 & 0x80)
         1144                                 goto found;
         1145                         else
         1146                                 f = 1;
         1147                         break;
         1148                 }
         1149                 if(*cp1 & 0x80) {
         1150                         f = -1;
         1151                         break;
         1152                 }
         1153                 f = *cp1++ - *wp++;
         1154                 if(f != 0)
         1155                         break;
         1156         }
         1157 
         1158         if(f < 0) {
         1159                 while(!(*cp1 & 0x80))
         1160                         cp1++;
         1161                 bp = cp1;
         1162                 goto loop;
         1163         }
         1164         ep = cp;
         1165         goto loop;
         1166 
         1167 found:
         1168         f = ((cp[0] & 0x7) << 8) |
         1169                 (cp[1] & 0xff);
         1170         if(xflag) {
         1171                 fprint(2, "=%.*s ", utfnlen(w, n), w);
         1172                 typeprint(encode[f]);
         1173         }
         1174         return encode[f];
         1175 }
         1176 
         1177 void
         1178 typeprint(Bits h)
         1179 {
         1180 
         1181         pcomma("");
         1182         if(h & NOUN)
         1183                 pcomma("n");
         1184         if(h & PROP_COLLECT)
         1185                 pcomma("pc");
         1186         if(h & VERB) {
         1187                 if((h & VERB) == VERB)
         1188                         pcomma("v");
         1189                 else
         1190                 if((h & VERB) == V_IRREG)
         1191                         pcomma("vi");
         1192                 else
         1193                 if(h & ED)
         1194                         pcomma("ed");
         1195         }
         1196         if(h & ADJ)
         1197                 pcomma("a");
         1198         if(h & COMP) {
         1199                 if((h & COMP) == ACTOR)
         1200                         pcomma("er");
         1201                 else
         1202                         pcomma("comp");
         1203         }
         1204         if(h & DONT_TOUCH)
         1205                 pcomma("d");
         1206         if(h & N_AFFIX)
         1207                 pcomma("na");
         1208         if(h & ADV)
         1209                 pcomma("adv");
         1210         if(h & ION)
         1211                 pcomma("ion");
         1212         if(h & V_AFFIX)
         1213                 pcomma("va");
         1214         if(h & MAN)
         1215                 pcomma("man");
         1216         if(h & NOPREF)
         1217                 pcomma("nopref");
         1218         if(h & MONO)
         1219                 pcomma("ms");
         1220         if(h & IN)
         1221                 pcomma("in");
         1222         if(h & _Y)
         1223                 pcomma("y");
         1224         if(h & STOP)
         1225                 pcomma("s");
         1226         fprint(2, "\n");
         1227 }
         1228 
         1229 void
         1230 pcomma(char *s)
         1231 {
         1232         static int flag;
         1233 
         1234         if(*s == 0) {
         1235                 flag = 0;
         1236                 return;
         1237         }
         1238         if(!flag) {
         1239                 fprint(2, "%s", s);
         1240                 flag = 1;
         1241         } else
         1242                 fprint(2, ",%s", s);
         1243 }
         1244 
         1245 /*
         1246  * is the word on of the following
         1247  *        12th        teen
         1248  *        21st        end in 1
         1249  *        23rd        end in 3
         1250  *        77th        default
         1251  * called knowing word[0] is a digit
         1252  */
         1253 int
         1254 ordinal(void)
         1255 {
         1256         char *cp = word;
         1257         static char sp[4];
         1258 
         1259         while(ISDIGIT(*cp))
         1260                 cp++;
         1261         strncpy(sp,cp,3);
         1262         if(ISUPPER(cp[0]) && ISUPPER(cp[1])) {
         1263                 sp[0] = Tolower(cp[0]);
         1264                 sp[1] = Tolower(cp[1]);
         1265         }
         1266         return 0 == strncmp(sp,
         1267                 cp[-2]=='1'? "th":        /* out of bounds if 1 digit */
         1268                 *--cp=='1'? "st":        /* harmless */
         1269                 *cp=='2'? "nd":
         1270                 *cp=='3'? "rd":
         1271                 "th", 3);
         1272 }
         1273 
         1274 /*
         1275  * read in the dictionary.
         1276  * format is
         1277  * {
         1278  *        short        nencode;
         1279  *        long        encode[nencode];
         1280  *        char        space[*];
         1281  * };
         1282  *
         1283  * the encodings are a table all different
         1284  * affixes.
         1285  * the dictionary proper has 2 bytes
         1286  * that demark and then the rest of the
         1287  * word. the 2 bytes have the following
         1288  *        0x80 0x00        flag
         1289  *        0x78 0x00        count of prefix bytes
         1290  *                        common with prev word
         1291  *        0x07 0xff        affix code
         1292  *
         1293  * all ints are big endians in the file.
         1294  */
         1295 void
         1296 readdict(char *file)
         1297 {
         1298         char *s, *is, *lasts, *ls;
         1299         int c, i, sp, p;
         1300         int f;
         1301         long l;
         1302 
         1303         lasts = 0;
         1304         f = open(file, 0);
         1305         if(f == -1) {
         1306                 fprint(2, "cannot open %s\n", file);
         1307                 exits("open");
         1308         }
         1309         if(read(f, space, 2) != 2)
         1310                 goto bad;
         1311         nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
         1312         if(read(f, space, 4*nencode) != 4*nencode)
         1313                 goto bad;
         1314         s = space;
         1315         for(i=0; i<nencode; i++) {
         1316                 l = (long)(s[0] & 0xff) << 24;
         1317                 l |= (s[1] & 0xff) << 16;
         1318                 l |= (s[2] & 0xff) << 8;
         1319                 l |= s[3] & 0xff;
         1320                 encode[i] = (Bits)l;
         1321                 s += 4;
         1322         }
         1323         l = read(f, space, sizeof(space));
         1324         if(l == sizeof(space))
         1325                 goto noroom;
         1326         is = space + (sizeof(space) - l);
         1327         memmove(is, space, l);
         1328 
         1329         s = space;
         1330         c = *is++ & 0xff;
         1331         sp = -1;
         1332         i = 0;
         1333 
         1334 loop:
         1335         if(s > is)
         1336                 goto noroom;
         1337         if(c < 0) {
         1338                 close(f);
         1339                 while(sp < 128*128)
         1340                         spacep[++sp] = s;
         1341                 *s = (char)0x80;                /* fence */
         1342                 return;
         1343         }
         1344         p = (c>>3) & 0xf;
         1345         *s++ = c;
         1346         *s++ = *is++ & 0xff;
         1347         if(p <= 0)
         1348                 i = (*is++ & 0xff)*128;
         1349         if(p <= 1) {
         1350                 if(!(*is & 0x80))
         1351                         i = i/128*128 + (*is++ & 0xff);
         1352                 if(i <= sp) {
         1353                         fprint(2, "the dict isnt sorted or \n");
         1354                         fprint(2, "memmove didn't work\n");
         1355                         goto bad;
         1356                 }
         1357                 while(sp < i)
         1358                         spacep[++sp] = s-2;
         1359         }
         1360         ls = lasts;
         1361         lasts = s;
         1362         for(p-=2; p>0; p--)
         1363                 *s++ = *ls++;
         1364         for(;;) {
         1365                 if(is >= space+sizeof(space)) {
         1366                         c = -1;
         1367                         break;
         1368                 }
         1369                 c = *is++ & 0xff;
         1370                 if(c & 0x80)
         1371                         break;
         1372                 *s++ = c;
         1373         }
         1374         *s = 0;
         1375         goto loop;
         1376 
         1377 bad:
         1378         fprint(2, "trouble reading %s\n", file);
         1379         exits("read");
         1380 noroom:
         1381         fprint(2, "not enough space for dictionary\n");
         1382         exits("space");
         1383 }