URI:
       toed.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       toed.c (36018B)
       ---
            1 #include <u.h>
            2 #include <libc.h>
            3 #include <bio.h>
            4 #include "dict.h"
            5 
            6 enum {
            7         Buflen=1000,
            8         Maxaux=5
            9 };
           10 
           11 /* Possible tags */
           12 enum {
           13         A,                /* author in quote (small caps) */
           14         B,                /* bold */
           15         Ba,                /* author inside bib */
           16         Bch,                /* builtup chem component */
           17         Bib,                /* surrounds word 'in' for bibliographic ref */
           18         Bl,                /* bold */
           19         Bo,                /* bond over */
           20         Bu,                /* bond under */
           21         Cb,                /* ? block of stuff (indent) */
           22         Cf,                /* cross ref to another entry (italics) */
           23         Chem,                /* chemistry formula */
           24         Co,                /* over (preceding sum, integral, etc.) */
           25         Col,                /* column of table (aux just may be r) */
           26         Cu,                /* under (preceding sum, integral, etc.) */
           27         Dat,                /* date */
           28         Db,                /* def block? indent */
           29         Dn,                /* denominator of fraction */
           30         E,                /* main entry */
           31         Ed,                /* editor's comments (in [...]) */
           32         Etym,                /* etymology (in [...]) */
           33         Fq,                /* frequency count (superscript) */
           34         Form,                /* formula */
           35         Fr,                /* fraction (contains <nu>, then <dn>) */
           36         Gk,                /* greek (transliteration) */
           37         Gr,                /* grammar? (e.g., around 'pa.' in 'pa. pple.') */
           38         Hg,                /* headword group */
           39         Hm,                /* homonym (superscript) */
           40         Hw,                /* headword (bold) */
           41         I,                /* italics */
           42         Il,                /* italic list? */
           43         In,                /* inferior (subscript) */
           44         L,                /* row of col of table */
           45         La,                /* status or usage label (italic) */
           46         Lc,                /* chapter/verse sort of thing for works */
           47         N,                /* note (smaller type) */
           48         Nu,                /* numerator of fraction */
           49         Ov,                /* needs overline */
           50         P,                /* paragraph (indent) */
           51         Ph,                /* pronunciation (transliteration) */
           52         Pi,                /* pile (frac without line) */
           53         Pqp,                /* subblock of quote */
           54         Pr,                /* pronunciation (in (...)) */
           55         Ps,                /* position (e.g., adv.) (italic) */
           56         Pt,                /* part (in lc) */
           57         Q,                /* quote in quote block */
           58         Qd,                /* quote date (bold) */
           59         Qig,                /* quote number (greek) */
           60         Qla,                /* status or usage label in quote (italic) */
           61         Qp,                /* quote block (small type, indent) */
           62         Qsn,                /* quote number */
           63         Qt,                /* quote words */
           64         R,                /* roman type style */
           65         Rx,                /* relative cross reference (e.g., next) */
           66         S,                /* another form? (italic) */
           67         S0,                /* sense (sometimes surrounds several sx's) */
           68         S1,                /* sense (aux num: indented bold letter) */
           69         S2,                /* sense (aux num: indented bold capital rom num) */
           70         S3,                /* sense (aux num: indented number of asterisks) */
           71         S4,                /* sense (aux num: indented bold number) */
           72         S5,                /* sense (aux num: indented number of asterisks) */
           73         S6,                /* subsense (aux num: bold letter) */
           74         S7a,                /* subsense (aux num: letter) */
           75         S7n,                /* subsense (aux num: roman numeral) */
           76         Sc,                /* small caps */
           77         Sgk,                /* subsense (aux num: transliterated greek) */
           78         Sn,                /* sense of subdefinition (aux num: roman letter) */
           79         Ss,                /* sans serif */
           80         Ssb,                /* sans serif bold */
           81         Ssi,                /* sans serif italic */
           82         Su,                /* superior (superscript) */
           83         Sub,                /* subdefinition */
           84         Table,                /* table (aux cols=number of columns) */
           85         Tt,                /* title? (italics) */
           86         Vd,                /* numeric label for variant form */
           87         Ve,                /* variant entry */
           88         Vf,                /* variant form (light bold) */
           89         Vfl,                /* list of vf's (starts with Also or Forms) */
           90         W,                /* work (e.g., Beowulf) (italics) */
           91         X,                /* cross reference to main word (small caps) */
           92         Xd,                /* cross reference to quotation by date */
           93         Xi,                /* internal cross reference ? (italic) */
           94         Xid,                /* cross reference identifer, in quote ? */
           95         Xs,                /* cross reference sense (lower number) */
           96         Xr,                /* list of x's */
           97         Ntag                /* end of tags */
           98 };
           99 
          100 /* Assoc tables must be sorted on first field */
          101 
          102 static Assoc tagtab[] = {
          103         {"a",                A},
          104         {"b",                B},
          105         {"ba",                Ba},
          106         {"bch",                Bch},
          107         {"bib",                Bib},
          108         {"bl",                Bl},
          109         {"bo",                Bo},
          110         {"bu",                Bu},
          111         {"cb",                Cb},
          112         {"cf",                Cf},
          113         {"chem",        Chem},
          114         {"co",                Co},
          115         {"col",                Col},
          116         {"cu",                Cu},
          117         {"dat",                Dat},
          118         {"db",                Db},
          119         {"dn",                Dn},
          120         {"e",                E},
          121         {"ed",                Ed},
          122         {"et",                Etym},
          123         {"etym",        Etym},
          124         {"form",        Form},
          125         {"fq",                Fq},
          126         {"fr",                Fr},
          127         {"frac",        Fr},
          128         {"gk",                Gk},
          129         {"gr",                Gr},
          130         {"hg",                Hg},
          131         {"hm",                Hm},
          132         {"hw",                Hw},
          133         {"i",                I},
          134         {"il",                Il},
          135         {"in",                In},
          136         {"l",                L},
          137         {"la",                La},
          138         {"lc",                Lc},
          139         {"n",                N},
          140         {"nu",                Nu},
          141         {"ov",                Ov},
          142         {"p",                P},
          143         {"ph",                Ph},
          144         {"pi",                Pi},
          145         {"pqp",                Pqp},
          146         {"pr",                Pr},
          147         {"ps",                Ps},
          148         {"pt",                Pt},
          149         {"q",                Q},
          150         {"qd",                Qd},
          151         {"qig",                Qig},
          152         {"qla",                Qla},
          153         {"qp",                Qp},
          154         {"qsn",                Qsn},
          155         {"qt",                Qt},
          156         {"r",                R},
          157         {"rx",                Rx},
          158         {"s",                S},
          159         {"s0",                S0},
          160         {"s1",                S1},
          161         {"s2",                S2},
          162         {"s3",                S3},
          163         {"s4",                S4},
          164         {"s5",                S5},
          165         {"s6",                S6},
          166         {"s7a",                S7a},
          167         {"s7n",                S7n},
          168         {"sc",                Sc},
          169         {"sgk",                Sgk},
          170         {"sn",                Sn},
          171         {"ss",                Ss,},
          172         {"ssb",                Ssb},
          173         {"ssi",                Ssi},
          174         {"su",                Su},
          175         {"sub",                Sub},
          176         {"table",        Table},
          177         {"tt",                Tt},
          178         {"vd",                Vd},
          179         {"ve",                Ve},
          180         {"vf",                Vf},
          181         {"vfl",                Vfl},
          182         {"w",                W},
          183         {"x",                X},
          184         {"xd",                Xd},
          185         {"xi",                Xi},
          186         {"xid",                Xid},
          187         {"xr",                Xr},
          188         {"xs",                Xs}
          189 };
          190 
          191 /* Possible tag auxilliary info */
          192 enum {
          193         Cols,                /* number of columns in a table */
          194         Num,                /* letter or number, for a sense */
          195         St,                /* status (e.g., obs) */
          196         Naux
          197 };
          198 
          199 static Assoc auxtab[] = {
          200         {"cols",        Cols},
          201         {"num",                Num},
          202         {"st",                St}
          203 };
          204 
          205 static Assoc spectab[] = {
          206         {"3on4",        0xbe},
          207         {"Aacu",        0xc1},
          208         {"Aang",        0xc5},
          209         {"Abarab",        0x100},
          210         {"Acirc",        0xc2},
          211         {"Ae",                0xc6},
          212         {"Agrave",        0xc0},
          213         {"Alpha",        0x391},
          214         {"Amac",        0x100},
          215         {"Asg",                0x1b7},                /* Unicyle. Cf "Sake" */
          216         {"Auml",        0xc4},
          217         {"Beta",        0x392},
          218         {"Cced",        0xc7},
          219         {"Chacek",        0x10c},
          220         {"Chi",                0x3a7},
          221         {"Chirho",        0x2627},                /* Chi Rho U+2627 */
          222         {"Csigma",        0x3da},
          223         {"Delta",        0x394},
          224         {"Eacu",        0xc9},
          225         {"Ecirc",        0xca},
          226         {"Edh",                0xd0},
          227         {"Epsilon",        0x395},
          228         {"Eta",                0x397},
          229         {"Gamma",        0x393},
          230         {"Iacu",        0xcd},
          231         {"Icirc",        0xce},
          232         {"Imac",        0x12a},
          233         {"Integ",        0x222b},
          234         {"Iota",        0x399},
          235         {"Kappa",        0x39a},
          236         {"Koppa",        0x3de},
          237         {"Lambda",        0x39b},
          238         {"Lbar",        0x141},
          239         {"Mu",                0x39c},
          240         {"Naira",        0x4e},                /* should have bar through */
          241         {"Nplus",        0x4e},                /* should have plus above */
          242         {"Ntilde",        0xd1},
          243         {"Nu",                0x39d},
          244         {"Oacu",        0xd3},
          245         {"Obar",        0xd8},
          246         {"Ocirc",        0xd4},
          247         {"Oe",                0x152},
          248         {"Omega",        0x3a9},
          249         {"Omicron",        0x39f},
          250         {"Ouml",        0xd6},
          251         {"Phi",                0x3a6},
          252         {"Pi",                0x3a0},
          253         {"Psi",                0x3a8},
          254         {"Rho",                0x3a1},
          255         {"Sacu",        0x15a},
          256         {"Sigma",        0x3a3},
          257         {"Summ",        0x2211},
          258         {"Tau",                0x3a4},
          259         {"Th",                0xde},
          260         {"Theta",        0x398},
          261         {"Tse",                0x426},
          262         {"Uacu",        0xda},
          263         {"Ucirc",        0xdb},
          264         {"Upsilon",        0x3a5},
          265         {"Uuml",        0xdc},
          266         {"Wyn",                0x1bf},                /* wynn U+01BF */
          267         {"Xi",                0x39e},
          268         {"Ygh",                0x1b7},                /* Yogh        U+01B7 */
          269         {"Zeta",        0x396},
          270         {"Zh",                0x1b7},                /* looks like Yogh. Cf "Sake" */
          271         {"a",                0x61},                /* ante */
          272         {"aacu",        0xe1},
          273         {"aang",        0xe5},
          274         {"aasper",        MAAS},
          275         {"abreve",        0x103},
          276         {"acirc",        0xe2},
          277         {"acu",                LACU},
          278         {"ae",                0xe6},
          279         {"agrave",        0xe0},
          280         {"ahook",        0x105},
          281         {"alenis",        MALN},
          282         {"alpha",        0x3b1},
          283         {"amac",        0x101},
          284         {"amp",                0x26},
          285         {"and",                MAND},
          286         {"ang",                LRNG},
          287         {"angle",        0x2220},
          288         {"ankh",        0x2625},                /* ankh U+2625 */
          289         {"ante",        0x61},                /* before (year) */
          290         {"aonq",        MAOQ},
          291         {"appreq",        0x2243},
          292         {"aquar",        0x2652},
          293         {"arDadfull",        0x636},                /* Dad U+0636 */
          294         {"arHa",        0x62d},                /* haa U+062D */
          295         {"arTa",        0x62a},                /* taa U+062A */
          296         {"arain",        0x639},                /* ain U+0639 */
          297         {"arainfull",        0x639},                /* ain U+0639 */
          298         {"aralif",        0x627},                /* alef U+0627 */
          299         {"arba",        0x628},                /* baa U+0628 */
          300         {"arha",        0x647},                /* ha U+0647 */
          301         {"aries",        0x2648},
          302         {"arnun",        0x646},                /* noon U+0646 */
          303         {"arnunfull",        0x646},                /* noon U+0646 */
          304         {"arpa",        0x647},                /* ha U+0647 */
          305         {"arqoph",        0x642},                /* qaf U+0642 */
          306         {"arshinfull",        0x634},                /* sheen U+0634 */
          307         {"arta",        0x62a},                /* taa U+062A */
          308         {"artafull",        0x62a},                /* taa U+062A */
          309         {"artha",        0x62b},                /* thaa U+062B */
          310         {"arwaw",        0x648},                /* waw U+0648 */
          311         {"arya",        0x64a},                /* ya U+064A */
          312         {"aryafull",        0x64a},                /* ya U+064A */
          313         {"arzero",        0x660},                /* indic zero U+0660 */
          314         {"asg",                0x292},                /* unicycle character. Cf "hallow" */
          315         {"asper",        LASP},
          316         {"assert",        0x22a2},
          317         {"astm",        0x2042},                /* asterism: should be upside down */
          318         {"at",                0x40},
          319         {"atilde",        0xe3},
          320         {"auml",        0xe4},
          321         {"ayin",        0x639},                /* arabic ain U+0639 */
          322         {"b1",                0x2d},                /* single bond */
          323         {"b2",                0x3d},                /* double bond */
          324         {"b3",                0x2261},                /* triple bond */
          325         {"bbar",        0x180},                /* b with bar U+0180 */
          326         {"beta",        0x3b2},
          327         {"bigobl",        0x2f},
          328         {"blC",                0x43},                /* should be black letter */
          329         {"blJ",                0x4a},                /* should be black letter */
          330         {"blU",                0x55},                /* should be black letter */
          331         {"blb",                0x62},                /* should be black letter */
          332         {"blozenge",        0x25ca},                /* U+25CA; should be black */
          333         {"bly",                0x79},                /* should be black letter */
          334         {"bra",                MBRA},
          335         {"brbl",        LBRB},
          336         {"breve",        LBRV},
          337         {"bslash",        '\\'},
          338         {"bsquare",        0x25a0},                /* black square U+25A0 */
          339         {"btril",        0x25c0},                /* U+25C0 */
          340         {"btrir",        0x25b6},                /* U+25B6 */
          341         {"c",                0x63},                /* circa */
          342         {"cab",                0x232a},
          343         {"cacu",        0x107},
          344         {"canc",        0x264b},
          345         {"capr",        0x2651},
          346         {"caret",        0x5e},
          347         {"cb",                0x7d},
          348         {"cbigb",        0x7d},
          349         {"cbigpren",        0x29},
          350         {"cbigsb",        0x5d},
          351         {"cced",        0xe7},
          352         {"cdil",        LCED},
          353         {"cdsb",        0x301b},                /* ]] U+301b */
          354         {"cent",        0xa2},
          355         {"chacek",        0x10d},
          356         {"chi",                0x3c7},
          357         {"circ",        LRNG},
          358         {"circa",        0x63},                /* about (year) */
          359         {"circbl",        0x325},                /* ring below accent U+0325 */
          360         {"circle",        0x25cb},                /* U+25CB */
          361         {"circledot",        0x2299},
          362         {"click",        0x296},
          363         {"club",        0x2663},
          364         {"comtime",        0x43},
          365         {"conj",        0x260c},
          366         {"cprt",        0xa9},
          367         {"cq",                '\''},
          368         {"cqq",                0x201d},
          369         {"cross",        0x2720},                /* maltese cross U+2720 */
          370         {"crotchet",        0x2669},
          371         {"csb",                0x5d},
          372         {"ctilde",        0x63},                /* +tilde */
          373         {"ctlig",        MLCT},
          374         {"cyra",        0x430},
          375         {"cyre",        0x435},
          376         {"cyrhard",        0x44a},
          377         {"cyrjat",        0x463},
          378         {"cyrm",        0x43c},
          379         {"cyrn",        0x43d},
          380         {"cyrr",        0x440},
          381         {"cyrsoft",        0x44c},
          382         {"cyrt",        0x442},
          383         {"cyry",        0x44b},
          384         {"dag",                0x2020},
          385         {"dbar",        0x111},
          386         {"dblar",        0x21cb},
          387         {"dblgt",        0x226b},
          388         {"dbllt",        0x226a},
          389         {"dced",        0x64},                /* +cedilla */
          390         {"dd",                MDD},
          391         {"ddag",        0x2021},
          392         {"ddd",                MDDD},
          393         {"decr",        0x2193},
          394         {"deg",                0xb0},
          395         {"dele",        0x64},                /* should be dele */
          396         {"delta",        0x3b4},
          397         {"descnode",        0x260b},                /* descending node U+260B */
          398         {"diamond",        0x2662},
          399         {"digamma",        0x3dd},
          400         {"div",                0xf7},
          401         {"dlessi",        0x131},
          402         {"dlessj1",        0x6a},                /* should be dotless */
          403         {"dlessj2",        0x6a},                /* should be dotless */
          404         {"dlessj3",        0x6a},                /* should be dotless */
          405         {"dollar",        0x24},
          406         {"dotab",        LDOT},
          407         {"dotbl",        LDTB},
          408         {"drachm",        0x292},
          409         {"dubh",        0x2d},
          410         {"eacu",        0xe9},
          411         {"earth",        0x2641},
          412         {"easper",        MEAS},
          413         {"ebreve",        0x115},
          414         {"ecirc",        0xea},
          415         {"edh",                0xf0},
          416         {"egrave",        0xe8},
          417         {"ehacek",        0x11b},
          418         {"ehook",        0x119},
          419         {"elem",        0x220a},
          420         {"elenis",        MELN},
          421         {"em",                0x2014},
          422         {"emac",        0x113},
          423         {"emem",        MEMM},
          424         {"en",                0x2013},
          425         {"epsilon",        0x3b5},
          426         {"equil",        0x21cb},
          427         {"ergo",        0x2234},
          428         {"es",                MES},
          429         {"eszett",        0xdf},
          430         {"eta",                0x3b7},
          431         {"eth",                0xf0},
          432         {"euml",        0xeb},
          433         {"expon",        0x2191},
          434         {"fact",        0x21},
          435         {"fata",        0x251},
          436         {"fatpara",        0xb6},                /* should have fatter, filled in bowl */
          437         {"female",        0x2640},
          438         {"ffilig",        MLFFI},
          439         {"fflig",        MLFF},
          440         {"ffllig",        MLFFL},
          441         {"filig",        MLFI},
          442         {"flat",        0x266d},
          443         {"fllig",        MLFL},
          444         {"frE",                0x45},                /* should be curly */
          445         {"frL",                'L'},                /* should be curly */
          446         {"frR",                0x52},                /* should be curly */
          447         {"frakB",        0x42},                /* should have fraktur style */
          448         {"frakG",        0x47},
          449         {"frakH",        0x48},
          450         {"frakI",        0x49},
          451         {"frakM",        0x4d},
          452         {"frakU",        0x55},
          453         {"frakX",        0x58},
          454         {"frakY",        0x59},
          455         {"frakh",        0x68},
          456         {"frbl",        LFRB},
          457         {"frown",        LFRN},
          458         {"fs",                0x20},
          459         {"fsigma",        0x3c2},
          460         {"gAacu",        0xc1},                /* should be Α+acute */
          461         {"gaacu",        0x3b1},                /* +acute */
          462         {"gabreve",        0x3b1},                /* +breve */
          463         {"gafrown",        0x3b1},                /* +frown */
          464         {"gagrave",        0x3b1},                /* +grave */
          465         {"gamac",        0x3b1},                /* +macron */
          466         {"gamma",        0x3b3},
          467         {"gauml",        0x3b1},                /* +umlaut */
          468         {"ge",                0x2267},
          469         {"geacu",        0x3b5},                /* +acute */
          470         {"gegrave",        0x3b5},                /* +grave */
          471         {"ghacu",        0x3b7},                /* +acute */
          472         {"ghfrown",        0x3b7},                /* +frown */
          473         {"ghgrave",        0x3b7},                /* +grave */
          474         {"ghmac",        0x3b7},                /* +macron */
          475         {"giacu",        0x3b9},                /* +acute */
          476         {"gibreve",        0x3b9},                /* +breve */
          477         {"gifrown",        0x3b9},                /* +frown */
          478         {"gigrave",        0x3b9},                /* +grave */
          479         {"gimac",        0x3b9},                /* +macron */
          480         {"giuml",        0x3b9},                /* +umlaut */
          481         {"glagjat",        0x467},
          482         {"glots",        0x2c0},
          483         {"goacu",        0x3bf},                /* +acute */
          484         {"gobreve",        0x3bf},                /* +breve */
          485         {"grave",        LGRV},
          486         {"gt",                0x3e},
          487         {"guacu",        0x3c5},                /* +acute */
          488         {"gufrown",        0x3c5},                /* +frown */
          489         {"gugrave",        0x3c5},                /* +grave */
          490         {"gumac",        0x3c5},                /* +macron */
          491         {"guuml",        0x3c5},                /* +umlaut */
          492         {"gwacu",        0x3c9},                /* +acute */
          493         {"gwfrown",        0x3c9},                /* +frown */
          494         {"gwgrave",        0x3c9},                /* +grave */
          495         {"hacek",        LHCK},
          496         {"halft",        0x2308},
          497         {"hash",        0x23},
          498         {"hasper",        MHAS},
          499         {"hatpath",        0x5b2},                /* hataf patah U+05B2 */
          500         {"hatqam",        0x5b3},                /* hataf qamats U+05B3 */
          501         {"hatseg",        0x5b1},                /* hataf segol U+05B1 */
          502         {"hbar",        0x127},
          503         {"heart",        0x2661},
          504         {"hebaleph",        0x5d0},                /* aleph U+05D0 */
          505         {"hebayin",        0x5e2},                /* ayin U+05E2 */
          506         {"hebbet",        0x5d1},                /* bet U+05D1 */
          507         {"hebbeth",        0x5d1},                /* bet U+05D1 */
          508         {"hebcheth",        0x5d7},                /* bet U+05D7 */
          509         {"hebdaleth",        0x5d3},                /* dalet U+05D3 */
          510         {"hebgimel",        0x5d2},                /* gimel U+05D2 */
          511         {"hebhe",        0x5d4},                /* he U+05D4 */
          512         {"hebkaph",        0x5db},                /* kaf U+05DB */
          513         {"heblamed",        0x5dc},                /* lamed U+05DC */
          514         {"hebmem",        0x5de},                /* mem U+05DE */
          515         {"hebnun",        0x5e0},                /* nun U+05E0 */
          516         {"hebnunfin",        0x5df},                /* final nun U+05DF */
          517         {"hebpe",        0x5e4},                /* pe U+05E4 */
          518         {"hebpedag",        0x5e3},                /* final pe? U+05E3 */
          519         {"hebqoph",        0x5e7},                /* qof U+05E7 */
          520         {"hebresh",        0x5e8},                /* resh U+05E8 */
          521         {"hebshin",        0x5e9},                /* shin U+05E9 */
          522         {"hebtav",        0x5ea},                /* tav U+05EA */
          523         {"hebtsade",        0x5e6},                /* tsadi U+05E6 */
          524         {"hebwaw",        0x5d5},                /* vav? U+05D5 */
          525         {"hebyod",        0x5d9},                /* yod U+05D9 */
          526         {"hebzayin",        0x5d6},                /* zayin U+05D6 */
          527         {"hgz",                0x292},                /* ??? Cf "alet" */
          528         {"hireq",        0x5b4},                /* U+05B4 */
          529         {"hlenis",        MHLN},
          530         {"hook",        LOGO},
          531         {"horizE",        0x45},                /* should be on side */
          532         {"horizP",        0x50},                /* should be on side */
          533         {"horizS",        0x223d},
          534         {"horizT",        0x22a3},
          535         {"horizb",        0x7b},                /* should be underbrace */
          536         {"ia",                0x3b1},
          537         {"iacu",        0xed},
          538         {"iasper",        MIAS},
          539         {"ib",                0x3b2},
          540         {"ibar",        0x268},
          541         {"ibreve",        0x12d},
          542         {"icirc",        0xee},
          543         {"id",                0x3b4},
          544         {"ident",        0x2261},
          545         {"ie",                0x3b5},
          546         {"ifilig",        MLFI},
          547         {"ifflig",        MLFF},
          548         {"ig",                0x3b3},
          549         {"igrave",        0xec},
          550         {"ih",                0x3b7},
          551         {"ii",                0x3b9},
          552         {"ik",                0x3ba},
          553         {"ilenis",        MILN},
          554         {"imac",        0x12b},
          555         {"implies",        0x21d2},
          556         {"index",        0x261e},
          557         {"infin",        0x221e},
          558         {"integ",        0x222b},
          559         {"intsec",        0x2229},
          560         {"invpri",        0x2cf},
          561         {"iota",        0x3b9},
          562         {"iq",                0x3c8},
          563         {"istlig",        MLST},
          564         {"isub",        0x3f5},                /* iota below accent */
          565         {"iuml",        0xef},
          566         {"iz",                0x3b6},
          567         {"jup",                0x2643},
          568         {"kappa",        0x3ba},
          569         {"koppa",        0x3df},
          570         {"lambda",        0x3bb},
          571         {"lar",                0x2190},
          572         {"lbar",        0x142},
          573         {"le",                0x2266},
          574         {"lenis",        LLEN},
          575         {"leo",                0x264c},
          576         {"lhalfbr",        0x2308},
          577         {"lhshoe",        0x2283},
          578         {"libra",        0x264e},
          579         {"llswing",        MLLS},
          580         {"lm",                0x2d0},
          581         {"logicand",        0x2227},
          582         {"logicor",        0x2228},
          583         {"longs",        0x283},
          584         {"lrar",        0x2194},
          585         {"lt",                0x3c},
          586         {"ltappr",        0x227e},
          587         {"ltflat",        0x2220},
          588         {"lumlbl",        0x6c},                /* +umlaut below */
          589         {"mac",                LMAC},
          590         {"male",        0x2642},
          591         {"mc",                0x63},                /* should be raised */
          592         {"merc",        0x263f},                /* mercury U+263F */
          593         {"min",                0x2212},
          594         {"moonfq",        0x263d},                /* first quarter moon U+263D */
          595         {"moonlq",        0x263e},                /* last quarter moon U+263E */
          596         {"msylab",        0x6d},                /* +sylab (ˌ) */
          597         {"mu",                0x3bc},
          598         {"nacu",        0x144},
          599         {"natural",        0x266e},
          600         {"neq",                0x2260},
          601         {"nfacu",        0x2032},
          602         {"nfasper",        0x2bd},
          603         {"nfbreve",        0x2d8},
          604         {"nfced",        0xb8},
          605         {"nfcirc",        0x2c6},
          606         {"nffrown",        0x2322},
          607         {"nfgra",        0x2cb},
          608         {"nfhacek",        0x2c7},
          609         {"nfmac",        0xaf},
          610         {"nftilde",        0x2dc},
          611         {"nfuml",        0xa8},
          612         {"ng",                0x14b},
          613         {"not",                0xac},
          614         {"notelem",        0x2209},
          615         {"ntilde",        0xf1},
          616         {"nu",                0x3bd},
          617         {"oab",                0x2329},
          618         {"oacu",        0xf3},
          619         {"oasper",        MOAS},
          620         {"ob",                0x7b},
          621         {"obar",        0xf8},
          622         {"obigb",        0x7b},                /* should be big */
          623         {"obigpren",        0x28},
          624         {"obigsb",        0x5b},                /* should be big */
          625         {"obreve",        0x14f},
          626         {"ocirc",        0xf4},
          627         {"odsb",        0x301a},                /* [[ U+301A */
          628         {"oe",                0x153},
          629         {"oeamp",        0x26},
          630         {"ograve",        0xf2},
          631         {"ohook",        0x6f},                /* +hook */
          632         {"olenis",        MOLN},
          633         {"omac",        0x14d},
          634         {"omega",        0x3c9},
          635         {"omicron",        0x3bf},
          636         {"ope",                0x25b},
          637         {"opp",                0x260d},
          638         {"oq",                0x60},
          639         {"oqq",                0x201c},
          640         {"or",                MOR},
          641         {"osb",                0x5b},
          642         {"otilde",        0xf5},
          643         {"ouml",        0xf6},
          644         {"ounce",        0x2125},                /* ounce U+2125 */
          645         {"ovparen",        0x2322},                /* should be sideways ( */
          646         {"p",                0x2032},
          647         {"pa",                0x2202},
          648         {"page",        0x50},
          649         {"pall",        0x28e},
          650         {"paln",        0x272},
          651         {"par",                PAR},
          652         {"para",        0xb6},
          653         {"pbar",        0x70},                /* +bar */
          654         {"per",                0x2118},                /* per U+2118 */
          655         {"phi",                0x3c6},
          656         {"phi2",        0x3d5},
          657         {"pi",                0x3c0},
          658         {"pisces",        0x2653},
          659         {"planck",        0x127},
          660         {"plantinJ",        0x4a},                /* should be script */
          661         {"pm",                0xb1},
          662         {"pmil",        0x2030},
          663         {"pp",                0x2033},
          664         {"ppp",                0x2034},
          665         {"prop",        0x221d},
          666         {"psi",                0x3c8},
          667         {"pstlg",        0xa3},
          668         {"q",                0x3f},                /* should be raised */
          669         {"qamets",        0x5b3},                /* U+05B3 */
          670         {"quaver",        0x266a},
          671         {"rar",                0x2192},
          672         {"rasper",        MRAS},
          673         {"rdot",        0xb7},
          674         {"recipe",        0x211e},                /* U+211E */
          675         {"reg",                0xae},
          676         {"revC",        0x186},                /* open O U+0186 */
          677         {"reva",        0x252},
          678         {"revc",        0x254},
          679         {"revope",        0x25c},
          680         {"revr",        0x279},
          681         {"revsc",        0x2d2},                /* upside-down semicolon */
          682         {"revv",        0x28c},
          683         {"rfa",                0x6f},                /* +hook (Cf "goal") */
          684         {"rhacek",        0x159},
          685         {"rhalfbr",        0x2309},
          686         {"rho",                0x3c1},
          687         {"rhshoe",        0x2282},
          688         {"rlenis",        MRLN},
          689         {"rsylab",        0x72},                /* +sylab */
          690         {"runash",        0x46},                /* should be runic 'ash' */
          691         {"rvow",        0x2d4},
          692         {"sacu",        0x15b},
          693         {"sagit",        0x2650},
          694         {"sampi",        0x3e1},
          695         {"saturn",        0x2644},
          696         {"sced",        0x15f},
          697         {"schwa",        0x259},
          698         {"scorpio",        0x264f},
          699         {"scrA",        0x41},                /* should be script */
          700         {"scrC",        0x43},
          701         {"scrE",        0x45},
          702         {"scrF",        0x46},
          703         {"scrI",        0x49},
          704         {"scrJ",        0x4a},
          705         {"scrL",        'L'},
          706         {"scrO",        0x4f},
          707         {"scrP",        0x50},
          708         {"scrQ",        0x51},
          709         {"scrS",        0x53},
          710         {"scrT",        0x54},
          711         {"scrb",        0x62},
          712         {"scrd",        0x64},
          713         {"scrh",        0x68},
          714         {"scrl",        0x6c},
          715         {"scruple",        0x2108},                /* U+2108 */
          716         {"sdd",                0x2d0},
          717         {"sect",        0xa7},
          718         {"semE",        0x2203},
          719         {"sh",                0x283},
          720         {"shacek",        0x161},
          721         {"sharp",        0x266f},
          722         {"sheva",        0x5b0},                /* U+05B0 */
          723         {"shti",        0x26a},
          724         {"shtsyll",        0x222a},
          725         {"shtu",        0x28a},
          726         {"sidetri",        0x22b2},
          727         {"sigma",        0x3c3},
          728         {"since",        0x2235},
          729         {"slge",        0x2265},                /* should have slanted line under */
          730         {"slle",        0x2264},                /* should have slanted line under */
          731         {"sm",                0x2c8},
          732         {"smm",                0x2cc},
          733         {"spade",        0x2660},
          734         {"sqrt",        0x221a},
          735         {"square",        0x25a1},                /* U+25A1 */
          736         {"ssChi",        0x3a7},                /* should be sans serif */
          737         {"ssIota",        0x399},
          738         {"ssOmicron",        0x39f},
          739         {"ssPi",        0x3a0},
          740         {"ssRho",        0x3a1},
          741         {"ssSigma",        0x3a3},
          742         {"ssTau",        0x3a4},
          743         {"star",        0x2a},
          744         {"stlig",        MLST},
          745         {"sup2",        0x2072},
          746         {"supgt",        0x2c3},
          747         {"suplt",        0x2c2},
          748         {"sur",                0x2b3},
          749         {"swing",        0x223c},
          750         {"tau",                0x3c4},
          751         {"taur",        0x2649},
          752         {"th",                0xfe},
          753         {"thbar",        0xfe},                /* +bar */
          754         {"theta",        0x3b8},
          755         {"thinqm",        0x3f},                /* should be thinner */
          756         {"tilde",        LTIL},
          757         {"times",        0xd7},
          758         {"tri",                0x2206},
          759         {"trli",        0x2016},
          760         {"ts",                0x2009},
          761         {"uacu",        0xfa},
          762         {"uasper",        MUAS},
          763         {"ubar",        0x75},                /* +bar */
          764         {"ubreve",        0x16d},
          765         {"ucirc",        0xfb},
          766         {"udA",                0x2200},
          767         {"udT",                0x22a5},
          768         {"uda",                0x250},
          769         {"udh",                0x265},
          770         {"udqm",        0xbf},
          771         {"udpsi",        0x22d4},
          772         {"udtr",        0x2207},
          773         {"ugrave",        0xf9},
          774         {"ulenis",        MULN},
          775         {"umac",        0x16b},
          776         {"uml",                LUML},
          777         {"undl",        0x2cd},                /* underline accent */
          778         {"union",        0x222a},
          779         {"upsilon",        0x3c5},
          780         {"uuml",        0xfc},
          781         {"vavpath",        0x5d5},                /* vav U+05D5 (+patah) */
          782         {"vavsheva",        0x5d5},                /* vav U+05D5 (+sheva) */
          783         {"vb",                0x7c},
          784         {"vddd",        0x22ee},
          785         {"versicle2",        0x2123},                /* U+2123 */
          786         {"vinc",        0xaf},
          787         {"virgo",        0x264d},
          788         {"vpal",        0x25f},
          789         {"vvf",                0x263},
          790         {"wasper",        MWAS},
          791         {"wavyeq",        0x2248},
          792         {"wlenis",        MWLN},
          793         {"wyn",                0x1bf},                /* wynn U+01BF */
          794         {"xi",                0x3be},
          795         {"yacu",        0xfd},
          796         {"ycirc",        0x177},
          797         {"ygh",                0x292},
          798         {"ymac",        0x79},                /* +macron */
          799         {"yuml",        0xff},
          800         {"zced",        0x7a},                /* +cedilla */
          801         {"zeta",        0x3b6},
          802         {"zh",                0x292},
          803         {"zhacek",        0x17e}
          804 };
          805 /*
          806    The following special characters don't have close enough
          807    equivalents in Unicode, so aren't in the above table.
          808         22n                2^(2^n) Cf Fermat
          809         2on4                2/4
          810         3on8                3/8
          811         Bantuo                Bantu O. Cf Otshi-herero
          812         Car                C with circular arrow on top
          813         albrtime         cut-time: C with vertical line
          814         ardal                Cf dental
          815         bantuo                Bantu o. Cf Otshi-herero
          816         bbc1                single chem bond below
          817         bbc2                double chem bond below
          818         bbl1                chem bond like /
          819         bbl2                chem bond like //
          820         bbr1                chem bond like \
          821         bbr2                chem bond \\
          822         bcop1                copper symbol. Cf copper
          823         bcop2                copper symbol. Cf copper
          824         benchm                Cf benchmark
          825         btc1                single chem bond above
          826         btc2                double chem bond above
          827         btl1                chem bond like \
          828         btl2                chem bond like \\
          829         btr1                chem bond like /
          830         btr2                chem bond line //
          831         burman                Cf Burman
          832         devph                sanskrit letter. Cf ph
          833         devrfls                sanskrit letter. Cf cerebral
          834         duplong[12]        musical note
          835         egchi                early form of chi
          836         eggamma[12]        early form of gamma
          837         egiota                early form of iota
          838         egkappa                early form of kappa
          839         eglambda        early form of lambda
          840         egmu[12]        early form of mu
          841         egnu[12]        early form of nu
          842         egpi[123]        early form of pi
          843         egrho[12]        early form of rho
          844         egsampi                early form of sampi
          845         egsan                early form of san
          846         egsigma[12]        early form of sigma
          847         egxi[123]        early form of xi
          848         elatS                early form of S
          849         elatc[12]        early form of C
          850         elatg[12]        early form of G
          851         glagjeri        Slavonic Glagolitic jeri
          852         glagjeru        Slavonic Glagolitic jeru
          853         hypolem                hypolemisk (line with underdot)
          854         lhrbr                lower half }
          855         longmord        long mordent
          856         mbwvow                backwards scretched C. Cf retract.
          857         mord                music symbol.  Cf mordent
          858         mostra                Cf direct
          859         ohgcirc                old form of circumflex
          860         oldbeta                old form of β. Cf perturbate
          861         oldsemibr[12]        old forms of semibreve. Cf prolation
          862         ormg                old form of g. Cf G
          863         para[12345]        form of ¶
          864         pauseo                musical pause sign
          865         pauseu                musical pause sign
          866         pharyng                Cf pharyngal
          867         ragr                Black letter ragged r
          868         repetn                musical repeat. Cf retort
          869         segno                musical segno sign
          870         semain[12]        semitic ain
          871         semhe                semitic he
          872         semheth                semitic heth
          873         semkaph                semitic kaph
          874         semlamed[12]        semitic lamed
          875         semmem                semitic mem
          876         semnum                semitic nun
          877         sempe                semitic pe
          878         semqoph[123]        semitic qoph
          879         semresh                semitic resh
          880         semtav[1234]        semitic tav
          881         semyod                semitic yod
          882         semzayin[123]        semitic zayin
          883         shtlong[12]        U with underbar. Cf glyconic
          884         sigmatau        σ,τ combination
          885         squaver                sixteenth note
          886         sqbreve                square musical breve note
          887         swast                swastika
          888         uhrbr                upper half of big }
          889         versicle1                Cf versicle
          890  */
          891 
          892 
          893 static Rune normtab[128] = {
          894         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          895 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          896         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          897 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          898         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          899 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          900         0x28,        0x29,        0x2a,        0x2b,        0x2c,        0x2d,        0x2e,        0x2f,
          901 /*30*/  0x30,        0x31,        0x32,        0x33,        0x34,        0x35,        0x36,        0x37,
          902         0x38,        0x39,        0x3a,        0x3b,        TAGS,        0x3d,        TAGE,        0x3f,
          903 /*40*/  0x40,        0x41,        0x42,        0x43,        0x44,        0x45,        0x46,        0x47,
          904         0x48,        0x49,        0x4a,        0x4b,        'L',        0x4d,        0x4e,        0x4f,
          905 /*50*/        0x50,        0x51,        0x52,        0x53,        0x54,        0x55,        0x56,        0x57,
          906         0x58,        0x59,        0x5a,        0x5b,        '\\',        0x5d,        0x5e,        0x5f,
          907 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          908         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          909 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          910         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          911 };
          912 static Rune phtab[128] = {
          913         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          914 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          915         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          916 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          917         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          918 /*20*/        0x20,        0x21,        0x2c8,        0x23,        0x24,        0x2cc,        0xe6,        '\'',
          919         0x28,        0x29,        0x2a,        0x2b,        0x2c,        0x2d,        0x2e,        0x2f,
          920 /*30*/  0x30,        0x31,        0x32,        0x25c,        0x34,        0x35,        0x36,        0x37,
          921         0x38,        0xf8,        0x2d0,        0x3b,        TAGS,        0x3d,        TAGE,        0x3f,
          922 /*40*/  0x259,        0x251,        0x42,        0x43,        0xf0,        0x25b,        0x46,        0x47,
          923         0x48,        0x26a,        0x4a,        0x4b,        'L',        0x4d,        0x14b,        0x254,
          924 /*50*/        0x50,        0x252,        0x52,        0x283,        0x3b8,        0x28a,        0x28c,        0x57,
          925         0x58,        0x59,        0x292,        0x5b,        '\\',        0x5d,        0x5e,        0x5f,
          926 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          927         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          928 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          929         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          930 };
          931 static Rune grtab[128] = {
          932         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          933 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          934         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          935 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          936         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          937 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          938         0x28,        0x29,        0x2a,        0x2b,        0x2c,        0x2d,        0x2e,        0x2f,
          939 /*30*/  0x30,        0x31,        0x32,        0x33,        0x34,        0x35,        0x36,        0x37,
          940         0x38,        0x39,        0x3a,        0x3b,        TAGS,        0x3d,        TAGE,        0x3f,
          941 /*40*/  0x40,        0x391,        0x392,        0x39e,        0x394,        0x395,        0x3a6,        0x393,
          942         0x397,        0x399,        0x3da,        0x39a,        0x39b,        0x39c,        0x39d,        0x39f,
          943 /*50*/        0x3a0,        0x398,        0x3a1,        0x3a3,        0x3a4,        0x3a5,        0x56,        0x3a9,
          944         0x3a7,        0x3a8,        0x396,        0x5b,        '\\',        0x5d,        0x5e,        0x5f,
          945 /*60*/        0x60,        0x3b1,        0x3b2,        0x3be,        0x3b4,        0x3b5,        0x3c6,        0x3b3,
          946         0x3b7,        0x3b9,        0x3c2,        0x3ba,        0x3bb,        0x3bc,        0x3bd,        0x3bf,
          947 /*70*/        0x3c0,        0x3b8,        0x3c1,        0x3c3,        0x3c4,        0x3c5,        0x76,        0x3c9,
          948         0x3c7,        0x3c8,        0x3b6,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          949 };
          950 static Rune subtab[128] = {
          951         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          952 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          953         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          954 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          955         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          956 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          957         0x208d,        0x208e,        0x2a,        0x208a,        0x2c,        0x208b,        0x2e,        0x2f,
          958 /*30*/  0x2080,        0x2081,        0x2082,        0x2083,        0x2084,        0x2085,        0x2086,        0x2087,
          959         0x2088,        0x2089,        0x3a,        0x3b,        TAGS,        0x208c,        TAGE,        0x3f,
          960 /*40*/  0x40,        0x41,        0x42,        0x43,        0x44,        0x45,        0x46,        0x47,
          961         0x48,        0x49,        0x4a,        0x4b,        'L',        0x4d,        0x4e,        0x4f,
          962 /*50*/        0x50,        0x51,        0x52,        0x53,        0x54,        0x55,        0x56,        0x57,
          963         0x58,        0x59,        0x5a,        0x5b,        '\\',        0x5d,        0x5e,        0x5f,
          964 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          965         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          966 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          967         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          968 };
          969 static Rune suptab[128] = {
          970         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          971 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          972         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          973 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          974         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          975 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          976         0x207d,        0x207e,        0x2a,        0x207a,        0x2c,        0x207b,        0x2e,        0x2f,
          977 /*30*/  0x2070,        0x2071,        0x2072,        0x2073,        0x2074,        0x2075,        0x2076,        0x2077,
          978         0x2078,        0x2079,        0x3a,        0x3b,        TAGS,        0x207c,        TAGE,        0x3f,
          979 /*40*/  0x40,        0x41,        0x42,        0x43,        0x44,        0x45,        0x46,        0x47,
          980         0x48,        0x49,        0x4a,        0x4b,        'L',        0x4d,        0x4e,        0x4f,
          981 /*50*/        0x50,        0x51,        0x52,        0x53,        0x54,        0x55,        0x56,        0x57,
          982         0x58,        0x59,        0x5a,        0x5b,        '\\',        0x5d,        0x5e,        0x5f,
          983 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          984         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          985 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          986         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          987 };
          988 
          989 static int        tagstarts;
          990 static char        tag[Buflen];
          991 static int        naux;
          992 static char        auxname[Maxaux][Buflen];
          993 static char        auxval[Maxaux][Buflen];
          994 static char        spec[Buflen];
          995 static char        *auxstate[Naux];        /* vals for most recent tag */
          996 static Entry        curentry;
          997 #define cursize (curentry.end-curentry.start)
          998 
          999 static char        *getspec(char *, char *);
         1000 static char        *gettag(char *, char *);
         1001 static void        dostatus(void);
         1002 
         1003 /*
         1004  * cmd is one of:
         1005  *    'p': normal print
         1006  *    'h': just print headwords
         1007  *    'P': print raw
         1008  */
         1009 void
         1010 oedprintentry(Entry e, int cmd)
         1011 {
         1012         char *p, *pe;
         1013         int t, a, i;
         1014         long r, rprev, rlig;
         1015         Rune *transtab;
         1016 
         1017         p = e.start;
         1018         pe = e.end;
         1019         transtab = normtab;
         1020         rprev = NONE;
         1021         changett(0, 0, 0);
         1022         curentry = e;
         1023         if(cmd == 'h')
         1024                 outinhibit = 1;
         1025         while(p < pe) {
         1026                 if(cmd == 'r') {
         1027                         outchar(*p++);
         1028                         continue;
         1029                 }
         1030                 r = transtab[(*p++)&0x7F];
         1031                 if(r < NONE) {
         1032                         /* Emit the rune, but buffer in case of ligature */
         1033                         if(rprev != NONE)
         1034                                 outrune(rprev);
         1035                         rprev = r;
         1036                 } else if(r == SPCS) {
         1037                         /* Start of special character name */
         1038                         p = getspec(p, pe);
         1039                         r = lookassoc(spectab, asize(spectab), spec);
         1040                         if(r == -1) {
         1041                                 if(debug)
         1042                                         err("spec %ld %d %s",
         1043                                                 e.doff, cursize, spec);
         1044                                 r = 0xfffd;
         1045                         }
         1046                         if(r >= LIGS && r < LIGE) {
         1047                                 /* handle possible ligature */
         1048                                 rlig = liglookup(r, rprev);
         1049                                 if(rlig != NONE)
         1050                                         rprev = rlig;        /* overwrite rprev */
         1051                                 else {
         1052                                         /* could print accent, but let's not */
         1053                                         if(rprev != NONE) outrune(rprev);
         1054                                         rprev = NONE;
         1055                                 }
         1056                         } else if(r >= MULTI && r < MULTIE) {
         1057                                 if(rprev != NONE) {
         1058                                         outrune(rprev);
         1059                                         rprev = NONE;
         1060                                 }
         1061                                 outrunes(multitab[r-MULTI]);
         1062                         } else if(r == PAR) {
         1063                                 if(rprev != NONE) {
         1064                                         outrune(rprev);
         1065                                         rprev = NONE;
         1066                                 }
         1067                                 outnl(1);
         1068                         } else {
         1069                                 if(rprev != NONE) outrune(rprev);
         1070                                 rprev = r;
         1071                         }
         1072                 } else if(r == TAGS) {
         1073                         /* Start of tag name */
         1074                         if(rprev != NONE) {
         1075                                 outrune(rprev);
         1076                                 rprev = NONE;
         1077                         }
         1078                         p = gettag(p, pe);
         1079                         t = lookassoc(tagtab, asize(tagtab), tag);
         1080                         if(t == -1) {
         1081                                 if(debug)
         1082                                         err("tag %ld %d %s",
         1083                                                 e.doff, cursize, tag);
         1084                                 continue;
         1085                         }
         1086                         for(i = 0; i < Naux; i++)
         1087                                 auxstate[i] = 0;
         1088                         for(i = 0; i < naux; i++) {
         1089                                 a = lookassoc(auxtab, asize(auxtab), auxname[i]);
         1090                                 if(a == -1) {
         1091                                         if(debug)
         1092                                                 err("aux %ld %d %s",
         1093                                                         e.doff, cursize, auxname[i]);
         1094                                 } else
         1095                                         auxstate[a] = auxval[i];
         1096                         }
         1097                         switch(t){
         1098                         case E:
         1099                         case Ve:
         1100                                 outnl(0);
         1101                                 if(tagstarts)
         1102                                         dostatus();
         1103                                 break;
         1104                         case Ed:
         1105                         case Etym:
         1106                                 outchar(tagstarts? '[' : ']');
         1107                                 break;
         1108                         case Pr:
         1109                                 outchar(tagstarts? '(' : ')');
         1110                                 break;
         1111                         case In:
         1112                                 transtab = changett(transtab, subtab, tagstarts);
         1113                                 break;
         1114                         case Hm:
         1115                         case Su:
         1116                         case Fq:
         1117                                 transtab = changett(transtab, suptab, tagstarts);
         1118                                 break;
         1119                         case Gk:
         1120                                 transtab = changett(transtab, grtab, tagstarts);
         1121                                 break;
         1122                         case Ph:
         1123                                 transtab = changett(transtab, phtab, tagstarts);
         1124                                 break;
         1125                         case Hw:
         1126                                 if(cmd == 'h') {
         1127                                         if(!tagstarts)
         1128                                                 outchar(' ');
         1129                                         outinhibit = !tagstarts;
         1130                                 }
         1131                                 break;
         1132                         case S0:
         1133                         case S1:
         1134                         case S2:
         1135                         case S3:
         1136                         case S4:
         1137                         case S5:
         1138                         case S6:
         1139                         case S7a:
         1140                         case S7n:
         1141                         case Sn:
         1142                         case Sgk:
         1143                                 if(tagstarts) {
         1144                                         outnl(2);
         1145                                         dostatus();
         1146                                         if(auxstate[Num]) {
         1147                                                 if(t == S3 || t == S5) {
         1148                                                         i = atoi(auxstate[Num]);
         1149                                                         while(i--)
         1150                                                                 outchar('*');
         1151                                                         outchars("  ");
         1152                                                 } else if(t == S7a || t == S7n || t == Sn) {
         1153                                                         outchar('(');
         1154                                                         outchars(auxstate[Num]);
         1155                                                         outchars(") ");
         1156                                                 } else if(t == Sgk) {
         1157                                                         i = grtab[(uchar)auxstate[Num][0]];
         1158                                                         if(i != NONE)
         1159                                                                 outrune(i);
         1160                                                         outchars(".  ");
         1161                                                 } else {
         1162                                                         outchars(auxstate[Num]);
         1163                                                         outchars(".  ");
         1164                                                 }
         1165                                         }
         1166                                 }
         1167                                 break;
         1168                         case Cb:
         1169                         case Db:
         1170                         case Qp:
         1171                         case P:
         1172                                 if(tagstarts)
         1173                                         outnl(1);
         1174                                 break;
         1175                         case Table:
         1176                                 /*
         1177                                  * Todo: gather columns, justify them, etc.
         1178                                  * For now, just let colums come out as rows
         1179                                  */
         1180                                 if(!tagstarts)
         1181                                         outnl(0);
         1182                                 break;
         1183                         case Col:
         1184                                 if(tagstarts)
         1185                                         outnl(0);
         1186                                 break;
         1187                         case Dn:
         1188                                 if(tagstarts)
         1189                                         outchar('/');
         1190                                 break;
         1191                         }
         1192                 }
         1193         }
         1194         if(cmd == 'h') {
         1195                 outinhibit = 0;
         1196                 outnl(0);
         1197         }
         1198 }
         1199 
         1200 /*
         1201  * Return offset into bdict where next oed entry after fromoff starts.
         1202  * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
         1203  */
         1204 long
         1205 oednextoff(long fromoff)
         1206 {
         1207         long a, n;
         1208         int c;
         1209 
         1210         a = Bseek(bdict, fromoff, 0);
         1211         if(a < 0)
         1212                 return -1;
         1213         n = 0;
         1214         for(;;) {
         1215                 c = Bgetc(bdict);
         1216                 if(c < 0)
         1217                         break;
         1218                 if(c == '<') {
         1219                         c = Bgetc(bdict);
         1220                         if(c == 'e') {
         1221                                 c = Bgetc(bdict);
         1222                                 if(c == '>' || c == ' ')
         1223                                         n = 3;
         1224                         } else if(c == 'v' && Bgetc(bdict) == 'e') {
         1225                                 c = Bgetc(bdict);
         1226                                 if(c == '>' || c == ' ')
         1227                                         n = 4;
         1228                         }
         1229                         if(n)
         1230                                 break;
         1231                 }
         1232         }
         1233         return (Boffset(bdict)-n);
         1234 }
         1235 
         1236 static char *prkey1 =
         1237 "KEY TO THE PRONUNCIATION\n"
         1238 "\n"
         1239 "I. CONSONANTS\n"
         1240 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
         1241 "\n"
         1242 "g as in go (gəʊ)\n"
         1243 "h  ...  ho! (həʊ)\n"
         1244 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
         1245 "(r)...  her (hɜː(r))\n"
         1246 "s  ...  see (siː), success (səkˈsɜs)\n"
         1247 "w  ...  wear (wɛə(r))\n"
         1248 "hw ...  when (hwɛn)\n"
         1249 "j  ...  yes (jɛs)\n"
         1250 "θ  ...  thin (θin), bath (bɑːθ)\n"
         1251 "ð  ...  then (ðɛn), bathe (beɪð)\n"
         1252 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
         1253 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
         1254 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
         1255 ;
         1256 static char *prkey2 =
         1257 "dʒ ...  judge (dʒʌdʒ)\n"
         1258 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
         1259 "ŋg ...  finger (ˈfiŋgə(r))\n"
         1260 "\n"
         1261 "Foreign\n"
         1262 "ʎ as in It. seraglio (serˈraʎo)\n"
         1263 "ɲ  ...  Fr. cognac (kɔɲak)\n"
         1264 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
         1265 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
         1266 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
         1267 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
         1268 "ɥ  ...  Fr. cuisine (kɥizin)\n"
         1269 "\n"
         1270 ;
         1271 static char *prkey3 =
         1272 "II. VOWELS AND DIPTHONGS\n"
         1273 "\n"
         1274 "Short\n"
         1275 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
         1276 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
         1277 "æ  ...  pat (pæt)\n"
         1278 "ʌ  ...  putt (pʌt)\n"
         1279 "ɒ  ...  pot (pɒt)\n"
         1280 "ʊ  ...  put (pʊt)\n"
         1281 "ə  ...  another (əˈnʌðə(r))\n"
         1282 "(ə)...  beaten (ˈbiːt(ə)n)\n"
         1283 "i  ...  Fr. si (si)\n"
         1284 "e  ...  Fr. bébé (bebe)\n"
         1285 "a  ...  Fr. mari (mari)\n"
         1286 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
         1287 "ɔ  ...  Fr. homme (ɔm)\n"
         1288 "o  ...  Fr. eau (o)\n"
         1289 "ø  ...  Fr. peu (pø)\n"
         1290 ;
         1291 static char *prkey4 =
         1292 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
         1293 "u  ...  Fr. douce (dus)\n"
         1294 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
         1295 "y  ...  Fr. du (dy)\n"
         1296 "\n"
         1297 "Long\n"
         1298 "iː as in bean (biːn)\n"
         1299 "ɑː ...  barn (bɑːn)\n"
         1300 "ɔː ...  born (bɔːn)\n"
         1301 "uː ...  boon (buːn)\n"
         1302 "ɜː ...  burn (bɜːn)\n"
         1303 "eː ...  Ger. Schnee (ʃneː)\n"
         1304 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
         1305 "aː ...  Ger. Tag (taːk)\n"
         1306 "oː ...  Ger. Sohn (zoːn)\n"
         1307 "øː ...  Ger. Goethe (gøːtə)\n"
         1308 "yː ...  Ger. grün (gryːn)\n"
         1309 "\n"
         1310 ;
         1311 static char *prkey5 =
         1312 "Nasal\n"
         1313 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
         1314 "ã  ...  Fr. franc (frã)\n"
         1315 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
         1316 "œ˜ ...  Fr. un (œ˜)\n"
         1317 "\n"
         1318 "Dipthongs, etc.\n"
         1319 "eɪ as in bay (beɪ)\n"
         1320 "aɪ ...  buy (baɪ)\n"
         1321 "ɔɪ ...  boy (bɔɪ)\n"
         1322 "əʊ ...  no (nəʊ)\n"
         1323 "aʊ ...  now (naʊ)\n"
         1324 "ɪə ...  peer (pɪə(r))\n"
         1325 "ɛə ...  pair (pɛə(r))\n"
         1326 "ʊə ...  tour (tʊə(r))\n"
         1327 "ɔə ...  boar (bɔə(r))\n"
         1328 "\n"
         1329 ;
         1330 static char *prkey6 =
         1331 "III. STRESS\n"
         1332 "\n"
         1333 "Main stress: ˈ preceding stressed syllable\n"
         1334 "Secondary stress: ˌ preceding stressed syllable\n"
         1335 "\n"
         1336 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
         1337 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
         1338 
         1339 void
         1340 oedprintkey(void)
         1341 {
         1342         Bprint(bout, "%s%s%s%s%s%s",
         1343                 prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
         1344 }
         1345 
         1346 /*
         1347  * f points just after a '&', fe points at end of entry.
         1348  * Accumulate the special name, starting after the &
         1349  * and continuing until the next '.', in spec[].
         1350  * Return pointer to char after '.'.
         1351  */
         1352 static char *
         1353 getspec(char *f, char *fe)
         1354 {
         1355         char *t;
         1356         int c, i;
         1357 
         1358         t = spec;
         1359         i = sizeof spec;
         1360         while(--i > 0) {
         1361                 c = *f++;
         1362                 if(c == '.' || f == fe)
         1363                         break;
         1364                 *t++ = c;
         1365         }
         1366         *t = 0;
         1367         return f;
         1368 }
         1369 
         1370 /*
         1371  * f points just after '<'; fe points at end of entry.
         1372  * Expect next characters from bin to match:
         1373  *  [/][^ >]+( [^>=]+=[^ >]+)*>
         1374  *      tag   auxname auxval
         1375  * Accumulate the tag and its auxilliary information in
         1376  * tag[], auxname[][] and auxval[][].
         1377  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
         1378  * Set naux to the number of aux pairs found.
         1379  * Return pointer to after final '>'.
         1380  */
         1381 static char *
         1382 gettag(char *f, char *fe)
         1383 {
         1384         char *t;
         1385         int c, i;
         1386 
         1387         t = tag;
         1388         c = *f++;
         1389         if(c == '/')
         1390                 tagstarts = 0;
         1391         else {
         1392                 tagstarts = 1;
         1393                 *t++ = c;
         1394         }
         1395         i = Buflen;
         1396         naux = 0;
         1397         while(--i > 0) {
         1398                 c = *f++;
         1399                 if(c == '>' || f == fe)
         1400                         break;
         1401                 if(c == ' ') {
         1402                         *t = 0;
         1403                         t = auxname[naux];
         1404                         i = Buflen;
         1405                         if(naux < Maxaux-1)
         1406                                 naux++;
         1407                 } else if(naux && c == '=') {
         1408                         *t = 0;
         1409                         t = auxval[naux-1];
         1410                         i = Buflen;
         1411                 } else
         1412                         *t++ = c;
         1413         }
         1414         *t = 0;
         1415         return f;
         1416 }
         1417 
         1418 static void
         1419 dostatus(void)
         1420 {
         1421         char *s;
         1422 
         1423         s = auxstate[St];
         1424         if(s) {
         1425                 if(strcmp(s, "obs") == 0)
         1426                         outrune(0x2020);
         1427                 else if(strcmp(s, "ali") == 0)
         1428                         outrune(0x2016);
         1429                 else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
         1430                         outrune(0xb6);
         1431                 else if(strcmp(s, "xref") == 0)
         1432                         {/* nothing */}
         1433                 else if(debug)
         1434                         err("status %ld %d %s", curentry.doff, cursize, s);
         1435         }
         1436 }