URI:
       tpgw.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tpgw.c (29812B)
       ---
            1 /* thanks to Caerwyn Jones <caerwyn@comcast.net> for this module */
            2 #include <u.h>
            3 #include <libc.h>
            4 #include <bio.h>
            5 #include "dict.h"
            6 
            7 enum {
            8         Buflen=1000,
            9         Maxaux=5
           10 };
           11 
           12 /* Possible tags */
           13 enum {
           14         B,                /* Bold */
           15         Blockquote,        /* Block quote */
           16         Br,                /* Break line */
           17         Cd,                /* ? coloquial data */
           18         Col,                /* ? Coloquial */
           19         Def,                /* Definition */
           20         Hw,                 /* Head Word */
           21         I,                /* Italics */
           22         P,                /* Paragraph */
           23         Pos,                /* Part of Speach */
           24         Sn,                /* Sense */
           25         U,                /* ? cross reference*/
           26         Wf,                /* ? word form */
           27         Ntag                /* end of tags */
           28 };
           29 
           30 /* Assoc tables must be sorted on first field */
           31 
           32 static Assoc tagtab[] = {
           33         {"b",                        B},
           34         {"blockquote",        Blockquote},
           35         {"BR",                Br},
           36         {"cd",                Cd},
           37         {"col",                Col},
           38         {"def",                Def},
           39         {"hw",                Hw},
           40         {"i",                        I},
           41         {"p",                        P},
           42         {"pos",                Pos},
           43         {"sn",                Sn},
           44         {"u",                        U},
           45         {"wf",                Wf}
           46 };
           47 
           48 /* Possible tag auxilliary info */
           49 enum {
           50         Cols,                /* number of columns in a table */
           51         Num,                /* letter or number, for a sense */
           52         St,                /* status (e.g., obs) */
           53         Naux
           54 };
           55 
           56 #if 0
           57 static Assoc auxtab[] = {
           58         {"cols",        Cols},
           59         {"num",                Num},
           60         {"st",                St}
           61 };
           62 #endif
           63 
           64 static Assoc spectab[] = {
           65         {"3on4",        0xbe},
           66         {"AElig",                0xc6},
           67         {"Aacute",        0xc1},
           68         {"Aang",        0xc5},
           69         {"Abarab",        0x100},
           70         {"Acirc",        0xc2},
           71         {"Agrave",        0xc0},
           72         {"Alpha",        0x391},
           73         {"Amacr",        0x100},
           74         {"Asg",                0x1b7},                /* Unicyle. Cf "Sake" */
           75         {"Auml",        0xc4},
           76         {"Beta",        0x392},
           77         {"Cced",        0xc7},
           78         {"Chacek",        0x10c},
           79         {"Chi",                0x3a7},
           80         {"Chirho",        0x2627},                /* Chi Rho U+2627 */
           81         {"Csigma",        0x3da},
           82         {"Delta",        0x394},
           83         {"Eacute",        0xc9},
           84         {"Ecirc",        0xca},
           85         {"Edh",                0xd0},
           86         {"Epsilon",        0x395},
           87         {"Eta",                0x397},
           88         {"Gamma",        0x393},
           89         {"Iacute",        0xcd},
           90         {"Icirc",        0xce},
           91         {"Imacr",        0x12a},
           92         {"Integ",        0x222b},
           93         {"Iota",        0x399},
           94         {"Kappa",        0x39a},
           95         {"Koppa",        0x3de},
           96         {"Lambda",        0x39b},
           97         {"Lbar",        0x141},
           98         {"Mu",                0x39c},
           99         {"Naira",        0x4e},                /* should have bar through */
          100         {"Nplus",        0x4e},                /* should have plus above */
          101         {"Ntilde",        0xd1},
          102         {"Nu",                0x39d},
          103         {"Oacute",        0xd3},
          104         {"Obar",        0xd8},
          105         {"Ocirc",        0xd4},
          106         {"Oe",                0x152},
          107         {"Omega",        0x3a9},
          108         {"Omicron",        0x39f},
          109         {"Ouml",        0xd6},
          110         {"Phi",                0x3a6},
          111         {"Pi",                0x3a0},
          112         {"Psi",                0x3a8},
          113         {"Rho",                0x3a1},
          114         {"Sacute",        0x15a},
          115         {"Sigma",        0x3a3},
          116         {"Summ",        0x2211},
          117         {"Tau",                0x3a4},
          118         {"Th",                0xde},
          119         {"Theta",        0x398},
          120         {"Tse",                0x426},
          121         {"Uacute",        0xda},
          122         {"Ucirc",        0xdb},
          123         {"Upsilon",        0x3a5},
          124         {"Uuml",        0xdc},
          125         {"Wyn",                0x1bf},                /* wynn U+01BF */
          126         {"Xi",                0x39e},
          127         {"Ygh",                0x1b7},                /* Yogh        U+01B7 */
          128         {"Zeta",        0x396},
          129         {"Zh",                0x1b7},                /* looks like Yogh. Cf "Sake" */
          130         {"a",                0x61},                /* ante */
          131         {"aacute",        0xe1},
          132         {"aang",        0xe5},
          133         {"aasper",        MAAS},
          134         {"abreve",        0x103},
          135         {"acirc",        0xe2},
          136         {"acute",                LACU},
          137         {"aelig",                0xe6},
          138         {"agrave",        0xe0},
          139         {"ahook",        0x105},
          140         {"alenis",        MALN},
          141         {"alpha",        0x3b1},
          142         {"amacr",        0x101},
          143         {"amp",                0x26},
          144         {"and",                MAND},
          145         {"ang",                LRNG},
          146         {"angle",        0x2220},
          147         {"ankh",        0x2625},                /* ankh U+2625 */
          148         {"ante",        0x61},                /* before (year) */
          149         {"aonq",        MAOQ},
          150         {"appreq",        0x2243},
          151         {"aquar",        0x2652},
          152         {"arDadfull",        0x636},                /* Dad U+0636 */
          153         {"arHa",        0x62d},                /* haa U+062D */
          154         {"arTa",        0x62a},                /* taa U+062A */
          155         {"arain",        0x639},                /* ain U+0639 */
          156         {"arainfull",        0x639},                /* ain U+0639 */
          157         {"aralif",        0x627},                /* alef U+0627 */
          158         {"arba",        0x628},                /* baa U+0628 */
          159         {"arha",        0x647},                /* ha U+0647 */
          160         {"aries",        0x2648},
          161         {"arnun",        0x646},                /* noon U+0646 */
          162         {"arnunfull",        0x646},                /* noon U+0646 */
          163         {"arpa",        0x647},                /* ha U+0647 */
          164         {"arqoph",        0x642},                /* qaf U+0642 */
          165         {"arshinfull",        0x634},                /* sheen U+0634 */
          166         {"arta",        0x62a},                /* taa U+062A */
          167         {"artafull",        0x62a},                /* taa U+062A */
          168         {"artha",        0x62b},                /* thaa U+062B */
          169         {"arwaw",        0x648},                /* waw U+0648 */
          170         {"arya",        0x64a},                /* ya U+064A */
          171         {"aryafull",        0x64a},                /* ya U+064A */
          172         {"arzero",        0x660},                /* indic zero U+0660 */
          173         {"asg",                0x292},                /* unicycle character. Cf "hallow" */
          174         {"asper",        LASP},
          175         {"assert",        0x22a2},
          176         {"astm",        0x2042},                /* asterism: should be upside down */
          177         {"at",                0x40},
          178         {"atilde",        0xe3},
          179         {"auml",        0xe4},
          180         {"ayin",        0x639},                /* arabic ain U+0639 */
          181         {"b1",                0x2d},                /* single bond */
          182         {"b2",                0x3d},                /* double bond */
          183         {"b3",                0x2261},                /* triple bond */
          184         {"bbar",        0x180},                /* b with bar U+0180 */
          185         {"beta",        0x3b2},
          186         {"bigobl",        0x2f},
          187         {"blC",                0x43},                /* should be black letter */
          188         {"blJ",                0x4a},                /* should be black letter */
          189         {"blU",                0x55},                /* should be black letter */
          190         {"blb",                0x62},                /* should be black letter */
          191         {"blozenge",        0x25ca},                /* U+25CA; should be black */
          192         {"bly",                0x79},                /* should be black letter */
          193         {"bra",                MBRA},
          194         {"brbl",        LBRB},
          195         {"breve",        LBRV},
          196         {"bslash",'\\'},
          197         {"bsquare",        0x25a0},                /* black square U+25A0 */
          198         {"btril",        0x25c0},                /* U+25C0 */
          199         {"btrir",        0x25b6},                /* U+25B6 */
          200         {"c",                0x63},                /* circa */
          201         {"cab",                0x232a},
          202         {"cacute",        0x107},
          203         {"canc",        0x264b},
          204         {"capr",        0x2651},
          205         {"caret",        0x5e},
          206         {"cb",                0x7d},
          207         {"cbigb",        0x7d},
          208         {"cbigpren",        0x29},
          209         {"cbigsb",        0x5d},
          210         {"cced",        0xe7},
          211         {"cdil",        LCED},
          212         {"cdsb",        0x301b},                /* ]] U+301b */
          213         {"cent",        0xa2},
          214         {"chacek",        0x10d},
          215         {"chi",                0x3c7},
          216         {"circ",        LRNG},
          217         {"circa",        0x63},                /* about (year) */
          218         {"circbl",        0x325},                /* ring below accent U+0325 */
          219         {"circle",        0x25cb},                /* U+25CB */
          220         {"circledot",        0x2299},
          221         {"click",        0x296},
          222         {"club",        0x2663},
          223         {"comtime",        0x43},
          224         {"conj",        0x260c},
          225         {"cprt",        0xa9},
          226         {"cq",                '\''},
          227         {"cqq",                0x201d},
          228         {"cross",        0x2720},                /* maltese cross U+2720 */
          229         {"crotchet",        0x2669},
          230         {"csb",                0x5d},
          231         {"ctilde",        0x63},                /* +tilde */
          232         {"ctlig",        MLCT},
          233         {"cyra",        0x430},
          234         {"cyre",        0x435},
          235         {"cyrhard",        0x44a},
          236         {"cyrjat",        0x463},
          237         {"cyrm",        0x43c},
          238         {"cyrn",        0x43d},
          239         {"cyrr",        0x440},
          240         {"cyrsoft",        0x44c},
          241         {"cyrt",        0x442},
          242         {"cyry",        0x44b},
          243         {"dag",                0x2020},
          244         {"dbar",        0x111},
          245         {"dblar",        0x21cb},
          246         {"dblgt",        0x226b},
          247         {"dbllt",        0x226a},
          248         {"dced",        0x64},                /* +cedilla */
          249         {"dd",                MDD},
          250         {"ddag",        0x2021},
          251         {"ddd",                MDDD},
          252         {"decr",        0x2193},
          253         {"deg",                0xb0},
          254         {"dele",        0x64},                /* should be dele */
          255         {"delta",        0x3b4},
          256         {"descnode",        0x260b},                /* descending node U+260B */
          257         {"diamond",        0x2662},
          258         {"digamma",        0x3dd},
          259         {"div",                0xf7},
          260         {"dlessi",        0x131},
          261         {"dlessj1",        0x6a},                /* should be dotless */
          262         {"dlessj2",        0x6a},                /* should be dotless */
          263         {"dlessj3",        0x6a},                /* should be dotless */
          264         {"dollar",        0x24},
          265         {"dotab",        LDOT},
          266         {"dotbl",        LDTB},
          267         {"drachm",        0x292},
          268         {"dubh",        0x2d},
          269         {"eacute",        0xe9},
          270         {"earth",        0x2641},
          271         {"easper",        MEAS},
          272         {"ebreve",        0x115},
          273         {"ecirc",        0xea},
          274         {"edh",                0xf0},
          275         {"egrave",        0xe8},
          276         {"ehacek",        0x11b},
          277         {"ehook",        0x119},
          278         {"elem",        0x220a},
          279         {"elenis",        MELN},
          280         {"em",                0x2014},
          281         {"emacr",        0x113},
          282         {"emem",        MEMM},
          283         {"en",                0x2013},
          284         {"epsilon",        0x3b5},
          285         {"equil",        0x21cb},
          286         {"ergo",        0x2234},
          287         {"es",                MES},
          288         {"eszett",        0xdf},
          289         {"eta",                0x3b7},
          290         {"eth",                0xf0},
          291         {"euml",        0xeb},
          292         {"expon",        0x2191},
          293         {"fact",        0x21},
          294         {"fata",        0x251},
          295         {"fatpara",        0xb6},                /* should have fatter, filled in bowl */
          296         {"female",        0x2640},
          297         {"ffilig",        MLFFI},
          298         {"fflig",        MLFF},
          299         {"ffllig",        MLFFL},
          300         {"filig",        MLFI},
          301         {"flat",        0x266d},
          302         {"fllig",        MLFL},
          303         {"frE",                0x45},                /* should be curly */
          304         {"frL",        'L'},                /* should be curly */
          305         {"frR",                0x52},                /* should be curly */
          306         {"frakB",        0x42},                /* should have fraktur style */
          307         {"frakG",        0x47},
          308         {"frakH",        0x48},
          309         {"frakI",        0x49},
          310         {"frakM",        0x4d},
          311         {"frakU",        0x55},
          312         {"frakX",        0x58},
          313         {"frakY",        0x59},
          314         {"frakh",        0x68},
          315         {"frbl",        LFRB},
          316         {"frown",        LFRN},
          317         {"fs",                0x20},
          318         {"fsigma",        0x3c2},
          319         {"gAacute",        0xc1},                /* should be Α+acute */
          320         {"gaacute",        0x3b1},                /* +acute */
          321         {"gabreve",        0x3b1},                /* +breve */
          322         {"gafrown",        0x3b1},                /* +frown */
          323         {"gagrave",        0x3b1},                /* +grave */
          324         {"gamacr",        0x3b1},                /* +macron */
          325         {"gamma",        0x3b3},
          326         {"gauml",        0x3b1},                /* +umlaut */
          327         {"ge",                0x2267},
          328         {"geacute",        0x3b5},                /* +acute */
          329         {"gegrave",        0x3b5},                /* +grave */
          330         {"ghacute",        0x3b7},                /* +acute */
          331         {"ghfrown",        0x3b7},                /* +frown */
          332         {"ghgrave",        0x3b7},                /* +grave */
          333         {"ghmacr",        0x3b7},                /* +macron */
          334         {"giacute",        0x3b9},                /* +acute */
          335         {"gibreve",        0x3b9},                /* +breve */
          336         {"gifrown",        0x3b9},                /* +frown */
          337         {"gigrave",        0x3b9},                /* +grave */
          338         {"gimacr",        0x3b9},                /* +macron */
          339         {"giuml",        0x3b9},                /* +umlaut */
          340         {"glagjat",        0x467},
          341         {"glots",        0x2c0},
          342         {"goacute",        0x3bf},                /* +acute */
          343         {"gobreve",        0x3bf},                /* +breve */
          344         {"grave",        LGRV},
          345         {"gt",                0x3e},
          346         {"guacute",        0x3c5},                /* +acute */
          347         {"gufrown",        0x3c5},                /* +frown */
          348         {"gugrave",        0x3c5},                /* +grave */
          349         {"gumacr",        0x3c5},                /* +macron */
          350         {"guuml",        0x3c5},                /* +umlaut */
          351         {"gwacute",        0x3c9},                /* +acute */
          352         {"gwfrown",        0x3c9},                /* +frown */
          353         {"gwgrave",        0x3c9},                /* +grave */
          354         {"hacek",        LHCK},
          355         {"halft",        0x2308},
          356         {"hash",        0x23},
          357         {"hasper",        MHAS},
          358         {"hatpath",        0x5b2},                /* hataf patah U+05B2 */
          359         {"hatqam",        0x5b3},                /* hataf qamats U+05B3 */
          360         {"hatseg",        0x5b1},                /* hataf segol U+05B1 */
          361         {"hbar",        0x127},
          362         {"heart",        0x2661},
          363         {"hebaleph",        0x5d0},                /* aleph U+05D0 */
          364         {"hebayin",        0x5e2},                /* ayin U+05E2 */
          365         {"hebbet",        0x5d1},                /* bet U+05D1 */
          366         {"hebbeth",        0x5d1},                /* bet U+05D1 */
          367         {"hebcheth",        0x5d7},                /* bet U+05D7 */
          368         {"hebdaleth",        0x5d3},                /* dalet U+05D3 */
          369         {"hebgimel",        0x5d2},                /* gimel U+05D2 */
          370         {"hebhe",        0x5d4},                /* he U+05D4 */
          371         {"hebkaph",        0x5db},                /* kaf U+05DB */
          372         {"heblamed",        0x5dc},                /* lamed U+05DC */
          373         {"hebmem",        0x5de},                /* mem U+05DE */
          374         {"hebnun",        0x5e0},                /* nun U+05E0 */
          375         {"hebnunfin",        0x5df},                /* final nun U+05DF */
          376         {"hebpe",        0x5e4},                /* pe U+05E4 */
          377         {"hebpedag",        0x5e3},                /* final pe? U+05E3 */
          378         {"hebqoph",        0x5e7},                /* qof U+05E7 */
          379         {"hebresh",        0x5e8},                /* resh U+05E8 */
          380         {"hebshin",        0x5e9},                /* shin U+05E9 */
          381         {"hebtav",        0x5ea},                /* tav U+05EA */
          382         {"hebtsade",        0x5e6},                /* tsadi U+05E6 */
          383         {"hebwaw",        0x5d5},                /* vav? U+05D5 */
          384         {"hebyod",        0x5d9},                /* yod U+05D9 */
          385         {"hebzayin",        0x5d6},                /* zayin U+05D6 */
          386         {"hgz",                0x292},                /* ??? Cf "alet" */
          387         {"hireq",        0x5b4},                /* U+05B4 */
          388         {"hlenis",        MHLN},
          389         {"hook",        LOGO},
          390         {"horizE",        0x45},                /* should be on side */
          391         {"horizP",        0x50},                /* should be on side */
          392         {"horizS",        0x223d},
          393         {"horizT",        0x22a3},
          394         {"horizb",        0x7b},                /* should be underbrace */
          395         {"ia",                0x3b1},
          396         {"iacute",        0xed},
          397         {"iasper",        MIAS},
          398         {"ib",                0x3b2},
          399         {"ibar",        0x268},
          400         {"ibreve",        0x12d},
          401         {"icirc",        0xee},
          402         {"id",                0x3b4},
          403         {"ident",        0x2261},
          404         {"ie",                0x3b5},
          405         {"ifilig",        MLFI},
          406         {"ifflig",        MLFF},
          407         {"ig",                0x3b3},
          408         {"igrave",        0xec},
          409         {"ih",                0x3b7},
          410         {"ii",                0x3b9},
          411         {"ik",                0x3ba},
          412         {"ilenis",        MILN},
          413         {"imacr",        0x12b},
          414         {"implies",        0x21d2},
          415         {"index",        0x261e},
          416         {"infin",        0x221e},
          417         {"integ",        0x222b},
          418         {"intsec",        0x2229},
          419         {"invpri",        0x2cf},
          420         {"iota",        0x3b9},
          421         {"iq",                0x3c8},
          422         {"istlig",        MLST},
          423         {"isub",        0x3f5},                /* iota below accent */
          424         {"iuml",        0xef},
          425         {"iz",                0x3b6},
          426         {"jup",                0x2643},
          427         {"kappa",        0x3ba},
          428         {"koppa",        0x3df},
          429         {"lambda",        0x3bb},
          430         {"lar",                0x2190},
          431         {"lbar",        0x142},
          432         {"le",                0x2266},
          433         {"lenis",        LLEN},
          434         {"leo",                0x264c},
          435         {"lhalfbr",        0x2308},
          436         {"lhshoe",        0x2283},
          437         {"libra",        0x264e},
          438         {"llswing",        MLLS},
          439         {"lm",                0x2d0},
          440         {"logicand",        0x2227},
          441         {"logicor",        0x2228},
          442         {"longs",        0x283},
          443         {"lrar",        0x2194},
          444         {"lt",                0x3c},
          445         {"ltappr",        0x227e},
          446         {"ltflat",        0x2220},
          447         {"lumlbl",        0x6c},                /* +umlaut below */
          448         {"mac",                LMAC},
          449         {"male",        0x2642},
          450         {"mc",                0x63},                /* should be raised */
          451         {"merc",        0x263f},                /* mercury U+263F */
          452         {"min",                0x2212},
          453         {"moonfq",        0x263d},                /* first quarter moon U+263D */
          454         {"moonlq",        0x263e},                /* last quarter moon U+263E */
          455         {"msylab",        0x6d},                /* +sylab (ˌ) */
          456         {"mu",                0x3bc},
          457         {"nacute",        0x144},
          458         {"natural",        0x266e},
          459         {"neq",                0x2260},
          460         {"nfacute",        0x2032},
          461         {"nfasper",        0x2bd},
          462         {"nfbreve",        0x2d8},
          463         {"nfced",        0xb8},
          464         {"nfcirc",        0x2c6},
          465         {"nffrown",        0x2322},
          466         {"nfgra",        0x2cb},
          467         {"nfhacek",        0x2c7},
          468         {"nfmac",        0xaf},
          469         {"nftilde",        0x2dc},
          470         {"nfuml",        0xa8},
          471         {"ng",                0x14b},
          472         {"not",                0xac},
          473         {"notelem",        0x2209},
          474         {"ntilde",        0xf1},
          475         {"nu",                0x3bd},
          476         {"oab",                0x2329},
          477         {"oacute",        0xf3},
          478         {"oasper",        MOAS},
          479         {"ob",                0x7b},
          480         {"obar",        0xf8},
          481         {"obigb",        0x7b},                /* should be big */
          482         {"obigpren",        0x28},
          483         {"obigsb",        0x5b},                /* should be big */
          484         {"obreve",        0x14f},
          485         {"ocirc",        0xf4},
          486         {"odsb",        0x301a},                /* [[ U+301A */
          487         {"oelig",                0x153},
          488         {"oeamp",        0x26},
          489         {"ograve",        0xf2},
          490         {"ohook",        0x6f},                /* +hook */
          491         {"olenis",        MOLN},
          492         {"omacr",        0x14d},
          493         {"omega",        0x3c9},
          494         {"omicron",        0x3bf},
          495         {"ope",                0x25b},
          496         {"opp",                0x260d},
          497         {"oq",                0x60},
          498         {"oqq",                0x201c},
          499         {"or",                MOR},
          500         {"osb",                0x5b},
          501         {"otilde",        0xf5},
          502         {"ouml",        0xf6},
          503         {"ounce",        0x2125},                /* ounce U+2125 */
          504         {"ovparen",        0x2322},                /* should be sideways ( */
          505         {"p",                0x2032},
          506         {"pa",                0x2202},
          507         {"page",        0x50},
          508         {"pall",        0x28e},
          509         {"paln",        0x272},
          510         {"par",                PAR},
          511         {"para",        0xb6},
          512         {"pbar",        0x70},                /* +bar */
          513         {"per",                0x2118},                /* per U+2118 */
          514         {"phi",                0x3c6},
          515         {"phi2",        0x3d5},
          516         {"pi",                0x3c0},
          517         {"pisces",        0x2653},
          518         {"planck",        0x127},
          519         {"plantinJ",        0x4a},                /* should be script */
          520         {"pm",                0xb1},
          521         {"pmil",        0x2030},
          522         {"pp",                0x2033},
          523         {"ppp",                0x2034},
          524         {"prop",        0x221d},
          525         {"psi",                0x3c8},
          526         {"pstlg",        0xa3},
          527         {"q",                0x3f},                /* should be raised */
          528         {"qamets",        0x5b3},                /* U+05B3 */
          529         {"quaver",        0x266a},
          530         {"rar",                0x2192},
          531         {"rasper",        MRAS},
          532         {"rdot",        0xb7},
          533         {"recipe",        0x211e},                /* U+211E */
          534         {"reg",                0xae},
          535         {"revC",        0x186},                /* open O U+0186 */
          536         {"reva",        0x252},
          537         {"revc",        0x254},
          538         {"revope",        0x25c},
          539         {"revr",        0x279},
          540         {"revsc",        0x2d2},                /* upside-down semicolon */
          541         {"revv",        0x28c},
          542         {"rfa",                0x6f},                /* +hook (Cf "goal") */
          543         {"rhacek",        0x159},
          544         {"rhalfbr",        0x2309},
          545         {"rho",                0x3c1},
          546         {"rhshoe",        0x2282},
          547         {"rlenis",        MRLN},
          548         {"rsylab",        0x72},                /* +sylab */
          549         {"runash",        0x46},                /* should be runic 'ash' */
          550         {"rvow",        0x2d4},
          551         {"sacute",        0x15b},
          552         {"sagit",        0x2650},
          553         {"sampi",        0x3e1},
          554         {"saturn",        0x2644},
          555         {"sced",        0x15f},
          556         {"schwa",        0x259},
          557         {"scorpio",        0x264f},
          558         {"scrA",        0x41},                /* should be script */
          559         {"scrC",        0x43},
          560         {"scrE",        0x45},
          561         {"scrF",        0x46},
          562         {"scrI",        0x49},
          563         {"scrJ",        0x4a},
          564         {"scrL",'L'},
          565         {"scrO",        0x4f},
          566         {"scrP",        0x50},
          567         {"scrQ",        0x51},
          568         {"scrS",        0x53},
          569         {"scrT",        0x54},
          570         {"scrb",        0x62},
          571         {"scrd",        0x64},
          572         {"scrh",        0x68},
          573         {"scrl",        0x6c},
          574         {"scruple",        0x2108},                /* U+2108 */
          575         {"sdd",                0x2d0},
          576         {"sect",        0xa7},
          577         {"semE",        0x2203},
          578         {"sh",                0x283},
          579         {"shacek",        0x161},
          580         {"sharp",        0x266f},
          581         {"sheva",        0x5b0},                /* U+05B0 */
          582         {"shti",        0x26a},
          583         {"shtsyll",        0x222a},
          584         {"shtu",        0x28a},
          585         {"sidetri",        0x22b2},
          586         {"sigma",        0x3c3},
          587         {"since",        0x2235},
          588         {"slge",        0x2265},                /* should have slanted line under */
          589         {"slle",        0x2264},                /* should have slanted line under */
          590         {"sm",                0x2c8},
          591         {"smm",                0x2cc},
          592         {"spade",        0x2660},
          593         {"sqrt",        0x221a},
          594         {"square",        0x25a1},                /* U+25A1 */
          595         {"ssChi",        0x3a7},                /* should be sans serif */
          596         {"ssIota",        0x399},
          597         {"ssOmicron",        0x39f},
          598         {"ssPi",        0x3a0},
          599         {"ssRho",        0x3a1},
          600         {"ssSigma",        0x3a3},
          601         {"ssTau",        0x3a4},
          602         {"star",        0x2a},
          603         {"stlig",        MLST},
          604         {"sup2",        0x2072},
          605         {"supgt",        0x2c3},
          606         {"suplt",        0x2c2},
          607         {"sur",                0x2b3},
          608         {"swing",        0x223c},
          609         {"tau",                0x3c4},
          610         {"taur",        0x2649},
          611         {"th",                0xfe},
          612         {"thbar",        0xfe},                /* +bar */
          613         {"theta",        0x3b8},
          614         {"thinqm",        0x3f},                /* should be thinner */
          615         {"tilde",        LTIL},
          616         {"times",        0xd7},
          617         {"tri",                0x2206},
          618         {"trli",        0x2016},
          619         {"ts",                0x2009},
          620         {"uacute",        0xfa},
          621         {"uasper",        MUAS},
          622         {"ubar",        0x75},                /* +bar */
          623         {"ubreve",        0x16d},
          624         {"ucirc",        0xfb},
          625         {"udA",                0x2200},
          626         {"udT",                0x22a5},
          627         {"uda",                0x250},
          628         {"udh",                0x265},
          629         {"udqm",        0xbf},
          630         {"udpsi",        0x22d4},
          631         {"udtr",        0x2207},
          632         {"ugrave",        0xf9},
          633         {"ulenis",        MULN},
          634         {"umacr",        0x16b},
          635         {"uml",                LUML},
          636         {"undl",        0x2cd},                /* underline accent */
          637         {"union",        0x222a},
          638         {"upsilon",        0x3c5},
          639         {"uuml",        0xfc},
          640         {"vavpath",        0x5d5},                /* vav U+05D5 (+patah) */
          641         {"vavsheva",        0x5d5},                /* vav U+05D5 (+sheva) */
          642         {"vb",                0x7c},
          643         {"vddd",        0x22ee},
          644         {"versicle2",        0x2123},                /* U+2123 */
          645         {"vinc",        0xaf},
          646         {"virgo",        0x264d},
          647         {"vpal",        0x25f},
          648         {"vvf",                0x263},
          649         {"wasper",        MWAS},
          650         {"wavyeq",        0x2248},
          651         {"wlenis",        MWLN},
          652         {"wyn",                0x1bf},                /* wynn U+01BF */
          653         {"xi",                0x3be},
          654         {"yacute",        0xfd},
          655         {"ycirc",        0x177},
          656         {"ygh",                0x292},
          657         {"ymacr",        0x79},                /* +macron */
          658         {"yuml",        0xff},
          659         {"zced",        0x7a},                /* +cedilla */
          660         {"zeta",        0x3b6},
          661         {"zh",                0x292},
          662         {"zhacek",        0x17e}
          663 };
          664 /*
          665    The following special characters don't have close enough
          666    equivalents in Unicode, so aren't in the above table.
          667         22n                2^(2^n) Cf Fermat
          668         2on4                2/4
          669         3on8                3/8
          670         Bantuo                Bantu O. Cf Otshi-herero
          671         Car                C with circular arrow on top
          672         albrtime         cut-time: C with vertical line
          673         ardal                Cf dental
          674         bantuo                Bantu o. Cf Otshi-herero
          675         bbc1                single chem bond below
          676         bbc2                double chem bond below
          677         bbl1                chem bond like /
          678         bbl2                chem bond like //
          679         bbr1                chem bond like \
          680         bbr2                chem bond \\
          681         bcop1                copper symbol. Cf copper
          682         bcop2                copper symbol. Cf copper
          683         benchm                Cf benchmark
          684         btc1                single chem bond above
          685         btc2                double chem bond above
          686         btl1                chem bond like \
          687         btl2                chem bond like \\
          688         btr1                chem bond like /
          689         btr2                chem bond line //
          690         burman                Cf Burman
          691         devph                sanskrit letter. Cf ph
          692         devrfls                sanskrit letter. Cf cerebral
          693         duplong[12]        musical note
          694         egchi                early form of chi
          695         eggamma[12]        early form of gamma
          696         egiota                early form of iota
          697         egkappa                early form of kappa
          698         eglambda        early form of lambda
          699         egmu[12]        early form of mu
          700         egnu[12]        early form of nu
          701         egpi[123]        early form of pi
          702         egrho[12]        early form of rho
          703         egsampi                early form of sampi
          704         egsan                early form of san
          705         egsigma[12]        early form of sigma
          706         egxi[123]        early form of xi
          707         elatS                early form of S
          708         elatc[12]        early form of C
          709         elatg[12]        early form of G
          710         glagjeri        Slavonic Glagolitic jeri
          711         glagjeru        Slavonic Glagolitic jeru
          712         hypolem                hypolemisk (line with underdot)
          713         lhrbr                lower half }
          714         longmord        long mordent
          715         mbwvow                backwards scretched C. Cf retract.
          716         mord                music symbol.  Cf mordent
          717         mostra                Cf direct
          718         ohgcirc                old form of circumflex
          719         oldbeta                old form of β. Cf perturbate
          720         oldsemibr[12]        old forms of semibreve. Cf prolation
          721         ormg                old form of g. Cf G
          722         para[12345]        form of ¶
          723         pauseo                musical pause sign
          724         pauseu                musical pause sign
          725         pharyng                Cf pharyngal
          726         ragr                Black letter ragged r
          727         repetn                musical repeat. Cf retort
          728         segno                musical segno sign
          729         semain[12]        semitic ain
          730         semhe                semitic he
          731         semheth                semitic heth
          732         semkaph                semitic kaph
          733         semlamed[12]        semitic lamed
          734         semmem                semitic mem
          735         semnum                semitic nun
          736         sempe                semitic pe
          737         semqoph[123]        semitic qoph
          738         semresh                semitic resh
          739         semtav[1234]        semitic tav
          740         semyod                semitic yod
          741         semzayin[123]        semitic zayin
          742         shtlong[12]        U with underbar. Cf glyconic
          743         sigmatau        σ,τ combination
          744         squaver                sixteenth note
          745         sqbreve                square musical breve note
          746         swast                swastika
          747         uhrbr                upper half of big }
          748         versicle1                Cf versicle
          749  */
          750 
          751 
          752 static Rune normtab[128] = {
          753         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          754 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          755         NONE,        NONE,        ' ',        NONE,        NONE,        NONE,        NONE,        NONE,
          756 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          757         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          758 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          759         0x28,        0x29,        0x2a,        0x2b,        0x2c,        0x2d,        0x2e,        0x2f,
          760 /*30*/  0x30,        0x31,        0x32,        0x33,        0x34,        0x35,        0x36,        0x37,
          761         0x38,        0x39,        0x3a,        0x3b,        TAGS,        0x3d,        TAGE,        0x3f,
          762 /*40*/  0x40,        0x41,        0x42,        0x43,        0x44,        0x45,        0x46,        0x47,
          763         0x48,        0x49,        0x4a,        0x4b,'L',        0x4d,        0x4e,        0x4f,
          764 /*50*/        0x50,        0x51,        0x52,        0x53,        0x54,        0x55,        0x56,        0x57,
          765         0x58,        0x59,        0x5a,        0x5b,'\\',        0x5d,        0x5e,        0x5f,
          766 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          767         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          768 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          769         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          770 };
          771 #if 0
          772 static Rune phtab[128] = {
          773         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          774 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          775         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          776 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          777         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          778 /*20*/        0x20,        0x21,        0x2c8,        0x23,        0x24,        0x2cc,        0xe6,        '\'',
          779         0x28,        0x29,        0x2a,        0x2b,        0x2c,        0x2d,        0x2e,        0x2f,
          780 /*30*/  0x30,        0x31,        0x32,        0x25c,        0x34,        0x35,        0x36,        0x37,
          781         0x38,        0xf8,        0x2d0,        0x3b,        TAGS,        0x3d,        TAGE,        0x3f,
          782 /*40*/  0x259,        0x251,        0x42,        0x43,        0xf0,        0x25b,        0x46,        0x47,
          783         0x48,        0x26a,        0x4a,        0x4b,'L',        0x4d,        0x14b,        0x254,
          784 /*50*/        0x50,        0x252,        0x52,        0x283,        0x3b8,        0x28a,        0x28c,        0x57,
          785         0x58,        0x59,        0x292,        0x5b,'\\',        0x5d,        0x5e,        0x5f,
          786 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          787         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          788 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          789         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          790 };
          791 static Rune grtab[128] = {
          792         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          793 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          794         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          795 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          796         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          797 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          798         0x28,        0x29,        0x2a,        0x2b,        0x2c,        0x2d,        0x2e,        0x2f,
          799 /*30*/  0x30,        0x31,        0x32,        0x33,        0x34,        0x35,        0x36,        0x37,
          800         0x38,        0x39,        0x3a,        0x3b,        TAGS,        0x3d,        TAGE,        0x3f,
          801 /*40*/  0x40,        0x391,        0x392,        0x39e,        0x394,        0x395,        0x3a6,        0x393,
          802         0x397,        0x399,        0x3da,        0x39a,        0x39b,        0x39c,        0x39d,        0x39f,
          803 /*50*/        0x3a0,        0x398,        0x3a1,        0x3a3,        0x3a4,        0x3a5,        0x56,        0x3a9,
          804         0x3a7,        0x3a8,        0x396,        0x5b,'\\',        0x5d,        0x5e,        0x5f,
          805 /*60*/        0x60,        0x3b1,        0x3b2,        0x3be,        0x3b4,        0x3b5,        0x3c6,        0x3b3,
          806         0x3b7,        0x3b9,        0x3c2,        0x3ba,        0x3bb,        0x3bc,        0x3bd,        0x3bf,
          807 /*70*/        0x3c0,        0x3b8,        0x3c1,        0x3c3,        0x3c4,        0x3c5,        0x76,        0x3c9,
          808         0x3c7,        0x3c8,        0x3b6,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          809 };
          810 static Rune subtab[128] = {
          811         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          812 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          813         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          814 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          815         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          816 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          817         0x208d,        0x208e,        0x2a,        0x208a,        0x2c,        0x208b,        0x2e,        0x2f,
          818 /*30*/  0x2080,        0x2081,        0x2082,        0x2083,        0x2084,        0x2085,        0x2086,        0x2087,
          819         0x2088,        0x2089,        0x3a,        0x3b,        TAGS,        0x208c,        TAGE,        0x3f,
          820 /*40*/  0x40,        0x41,        0x42,        0x43,        0x44,        0x45,        0x46,        0x47,
          821         0x48,        0x49,        0x4a,        0x4b,'L',        0x4d,        0x4e,        0x4f,
          822 /*50*/        0x50,        0x51,        0x52,        0x53,        0x54,        0x55,        0x56,        0x57,
          823         0x58,        0x59,        0x5a,        0x5b,'\\',        0x5d,        0x5e,        0x5f,
          824 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          825         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          826 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          827         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          828 };
          829 static Rune suptab[128] = {
          830         /*0*/        /*1*/        /*2*/        /*3*/        /*4*/        /*5*/        /*6*/        /*7*/
          831 /*00*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          832         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          833 /*10*/        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          834         NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,        NONE,
          835 /*20*/        0x20,        0x21,        0x22,        0x23,        0x24,        0x25,        SPCS,        '\'',
          836         0x207d,        0x207e,        0x2a,        0x207a,        0x2c,        0x207b,        0x2e,        0x2f,
          837 /*30*/  0x2070,        0x2071,        0x2072,        0x2073,        0x2074,        0x2075,        0x2076,        0x2077,
          838         0x2078,        0x2079,        0x3a,        0x3b,        TAGS,        0x207c,        TAGE,        0x3f,
          839 /*40*/  0x40,        0x41,        0x42,        0x43,        0x44,        0x45,        0x46,        0x47,
          840         0x48,        0x49,        0x4a,        0x4b,'L',        0x4d,        0x4e,        0x4f,
          841 /*50*/        0x50,        0x51,        0x52,        0x53,        0x54,        0x55,        0x56,        0x57,
          842         0x58,        0x59,        0x5a,        0x5b,'\\',        0x5d,        0x5e,        0x5f,
          843 /*60*/        0x60,        0x61,        0x62,        0x63,        0x64,        0x65,        0x66,        0x67,
          844         0x68,        0x69,        0x6a,        0x6b,        0x6c,        0x6d,        0x6e,        0x6f,
          845 /*70*/        0x70,        0x71,        0x72,        0x73,        0x74,        0x75,        0x76,        0x77,
          846         0x78,        0x79,        0x7a,        0x7b,        0x7c,        0x7d,        0x7e,        NONE
          847 };
          848 #endif
          849 
          850 static int        tagstarts;
          851 static char        tag[Buflen];
          852 static char        spec[Buflen];
          853 static Entry        curentry;
          854 #define cursize (curentry.end-curentry.start)
          855 
          856 static char        *getspec(char *, char *);
          857 static char        *gettag(char *, char *);
          858 
          859 /*
          860  * cmd is one of:
          861  *    'p': normal print
          862  *    'h': just print headwords
          863  *    'P': print raw
          864  */
          865 void
          866 pgwprintentry(Entry e, int cmd)
          867 {
          868         char *p, *pe;
          869         int t;
          870         long r, rprev, rlig;
          871         Rune *transtab;
          872 
          873         p = e.start;
          874         pe = e.end;
          875         transtab = normtab;
          876         rprev = NONE;
          877         changett(0, 0, 0);
          878         curentry = e;
          879         if(cmd == 'h')
          880                 outinhibit = 1;
          881         while(p < pe) {
          882                 if(cmd == 'r') {
          883                         outchar(*p++);
          884                         continue;
          885                 }
          886                 r = transtab[(*p++)&0x7F];
          887                 if(r < NONE) {
          888                         /* Emit the rune, but buffer in case of ligature */
          889                         if(rprev != NONE)
          890                                 outrune(rprev);
          891                         rprev = r;
          892                 } else if(r == SPCS) {
          893                         /* Start of special character name */
          894                         p = getspec(p, pe);
          895                         r = lookassoc(spectab, asize(spectab), spec);
          896                         if(r == -1) {
          897                                 if(debug)
          898                                         err("spec %ld %d %s",
          899                                                 e.doff, cursize, spec);
          900                                 r = 0xfffd;
          901                         }
          902                         if(r >= LIGS && r < LIGE) {
          903                                 /* handle possible ligature */
          904                                 rlig = liglookup(r, rprev);
          905                                 if(rlig != NONE)
          906                                         rprev = rlig;        /* overwrite rprev */
          907                                 else {
          908                                         /* could print accent, but let's not */
          909                                         if(rprev != NONE) outrune(rprev);
          910                                         rprev = NONE;
          911                                 }
          912                         } else if(r >= MULTI && r < MULTIE) {
          913                                 if(rprev != NONE) {
          914                                         outrune(rprev);
          915                                         rprev = NONE;
          916                                 }
          917                                 outrunes(multitab[r-MULTI]);
          918                         } else if(r == PAR) {
          919                                 if(rprev != NONE) {
          920                                         outrune(rprev);
          921                                         rprev = NONE;
          922                                 }
          923                                 outnl(1);
          924                         } else {
          925                                 if(rprev != NONE) outrune(rprev);
          926                                 rprev = r;
          927                         }
          928                 } else if(r == TAGS) {
          929                         /* Start of tag name */
          930                         if(rprev != NONE) {
          931                                 outrune(rprev);
          932                                 rprev = NONE;
          933                         }
          934                         p = gettag(p, pe);
          935                         t = lookassoc(tagtab, asize(tagtab), tag);
          936                         if(t == -1) {
          937                                 if(debug)
          938                                         err("tag %ld %d %s",
          939                                                 e.doff, cursize, tag);
          940                                 continue;
          941                         }
          942                         switch(t){
          943                         case Hw:
          944                                 if(cmd == 'h') {
          945                                         if(!tagstarts)
          946                                                 outchar(' ');
          947                                         outinhibit = !tagstarts;
          948                                 }
          949                                 break;
          950                         case Sn:
          951                                 if(tagstarts) {
          952                                         outnl(2);
          953                                 }
          954                                 break;
          955                         case P:
          956                                 outnl(tagstarts);
          957                                 break;
          958                         case Col:
          959                         case Br:
          960                         case Blockquote:
          961                                 if(tagstarts)
          962                                         outnl(1);
          963                                 break;
          964                         case U:
          965                                 outchar('/');
          966                         }
          967                 }
          968         }
          969         if(cmd == 'h') {
          970                 outinhibit = 0;
          971                 outnl(0);
          972         }
          973 }
          974 
          975 /*
          976  * Return offset into bdict where next webster entry after fromoff starts.
          977  * Webster entries start with <p><hw>
          978  */
          979 long
          980 pgwnextoff(long fromoff)
          981 {
          982         long a, n;
          983         int c;
          984 
          985         a = Bseek(bdict, fromoff, 0);
          986         if(a != fromoff)
          987                 return -1;
          988         n = 0;
          989         for(;;) {
          990                 c = Bgetc(bdict);
          991                 if(c < 0)
          992                         break;
          993                 if(c == '<' && Bgetc(bdict) == 'p' && Bgetc(bdict) == '>') {
          994                         c = Bgetc(bdict);
          995                         if(c == '<') {
          996                                 if (Bgetc(bdict) == 'h' && Bgetc(bdict) == 'w'
          997                                         && Bgetc(bdict) == '>')
          998                                                 n = 7;
          999                         }else if (c == '{')
         1000                                 n = 4;
         1001                         if(n)
         1002                                 break;
         1003                 }
         1004         }
         1005         return (Boffset(bdict)-n);
         1006 }
         1007 
         1008 static char *prkey1 =
         1009 "KEY TO THE PRONUNCIATION\n"
         1010 "\n"
         1011 "I. CONSONANTS\n"
         1012 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
         1013 "\n"
         1014 "g as in go (gəʊ)\n"
         1015 "h  ...  ho! (həʊ)\n"
         1016 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
         1017 "(r)...  her (hɜː(r))\n"
         1018 "s  ...  see (siː), success (səkˈsɜs)\n"
         1019 "w  ...  wear (wɛə(r))\n"
         1020 "hw ...  when (hwɛn)\n"
         1021 "j  ...  yes (jɛs)\n"
         1022 "θ  ...  thin (θin), bath (bɑːθ)\n"
         1023 "ð  ...  then (ðɛn), bathe (beɪð)\n"
         1024 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
         1025 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
         1026 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
         1027 ;
         1028 static char *prkey2 =
         1029 "dʒ ...  judge (dʒʌdʒ)\n"
         1030 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
         1031 "ŋg ...  finger (ˈfiŋgə(r))\n"
         1032 "\n"
         1033 "Foreign\n"
         1034 "ʎ as in It. seraglio (serˈraʎo)\n"
         1035 "ɲ  ...  Fr. cognac (kɔɲak)\n"
         1036 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
         1037 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
         1038 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
         1039 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
         1040 "ɥ  ...  Fr. cuisine (kɥizin)\n"
         1041 "\n"
         1042 ;
         1043 static char *prkey3 =
         1044 "II. VOWELS AND DIPTHONGS\n"
         1045 "\n"
         1046 "Short\n"
         1047 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
         1048 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
         1049 "æ  ...  pat (pæt)\n"
         1050 "ʌ  ...  putt (pʌt)\n"
         1051 "ɒ  ...  pot (pɒt)\n"
         1052 "ʊ  ...  put (pʊt)\n"
         1053 "ə  ...  another (əˈnʌðə(r))\n"
         1054 "(ə)...  beaten (ˈbiːt(ə)n)\n"
         1055 "i  ...  Fr. si (si)\n"
         1056 "e  ...  Fr. bébé (bebe)\n"
         1057 "a  ...  Fr. mari (mari)\n"
         1058 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
         1059 "ɔ  ...  Fr. homme (ɔm)\n"
         1060 "o  ...  Fr. eau (o)\n"
         1061 "ø  ...  Fr. peu (pø)\n"
         1062 ;
         1063 static char *prkey4 =
         1064 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
         1065 "u  ...  Fr. douce (dus)\n"
         1066 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
         1067 "y  ...  Fr. du (dy)\n"
         1068 "\n"
         1069 "Long\n"
         1070 "iː as in bean (biːn)\n"
         1071 "ɑː ...  barn (bɑːn)\n"
         1072 "ɔː ...  born (bɔːn)\n"
         1073 "uː ...  boon (buːn)\n"
         1074 "ɜː ...  burn (bɜːn)\n"
         1075 "eː ...  Ger. Schnee (ʃneː)\n"
         1076 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
         1077 "aː ...  Ger. Tag (taːk)\n"
         1078 "oː ...  Ger. Sohn (zoːn)\n"
         1079 "øː ...  Ger. Goethe (gøːtə)\n"
         1080 "yː ...  Ger. grün (gryːn)\n"
         1081 "\n"
         1082 ;
         1083 static char *prkey5 =
         1084 "Nasal\n"
         1085 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
         1086 "ã  ...  Fr. franc (frã)\n"
         1087 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
         1088 "œ˜ ...  Fr. un (œ˜)\n"
         1089 "\n"
         1090 "Dipthongs, etc.\n"
         1091 "eɪ as in bay (beɪ)\n"
         1092 "aɪ ...  buy (baɪ)\n"
         1093 "ɔɪ ...  boy (bɔɪ)\n"
         1094 "əʊ ...  no (nəʊ)\n"
         1095 "aʊ ...  now (naʊ)\n"
         1096 "ɪə ...  peer (pɪə(r))\n"
         1097 "ɛə ...  pair (pɛə(r))\n"
         1098 "ʊə ...  tour (tʊə(r))\n"
         1099 "ɔə ...  boar (bɔə(r))\n"
         1100 "\n"
         1101 ;
         1102 static char *prkey6 =
         1103 "III. STRESS\n"
         1104 "\n"
         1105 "Main stress: ˈ preceding stressed syllable\n"
         1106 "Secondary stress: ˌ preceding stressed syllable\n"
         1107 "\n"
         1108 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
         1109 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
         1110 
         1111 void
         1112 pgwprintkey(void)
         1113 {
         1114         Bprint(bout, "%s%s%s%s%s%s",
         1115                 prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
         1116 }
         1117 
         1118 /*
         1119  * f points just after a '&', fe points at end of entry.
         1120  * Accumulate the special name, starting after the &
         1121  * and continuing until the next ';', in spec[].
         1122  * Return pointer to char after ';'.
         1123  */
         1124 static char *
         1125 getspec(char *f, char *fe)
         1126 {
         1127         char *t;
         1128         int c, i;
         1129 
         1130         t = spec;
         1131         i = sizeof spec;
         1132         while(--i > 0) {
         1133                 c = *f++;
         1134                 if(c == ';' || f == fe)
         1135                         break;
         1136                 *t++ = c;
         1137         }
         1138         *t = 0;
         1139         return f;
         1140 }
         1141 
         1142 /*
         1143  * f points just after '<'; fe points at end of entry.
         1144  * Expect next characters from bin to match:
         1145  *  [/][^ >]+( [^>=]+=[^ >]+)*>
         1146  *      tag   auxname auxval
         1147  * Accumulate the tag and its auxilliary information in
         1148  * tag[], auxname[][] and auxval[][].
         1149  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
         1150  * Set naux to the number of aux pairs found.
         1151  * Return pointer to after final '>'.
         1152  */
         1153 static char *
         1154 gettag(char *f, char *fe)
         1155 {
         1156         char *t;
         1157         int c, i;
         1158 
         1159         t = tag;
         1160         c = *f++;
         1161         if(c == '/')
         1162                 tagstarts = 0;
         1163         else {
         1164                 tagstarts = 1;
         1165                 *t++ = c;
         1166         }
         1167         i = Buflen;
         1168         while(--i > 0) {
         1169                 c = *f++;
         1170                 if(c == '>' || f == fe)
         1171                         break;
         1172                 *t++ = c;
         1173         }
         1174         *t = 0;
         1175         return f;
         1176 }