URI:
       tmanglegcc2.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tmanglegcc2.c (11626B)
       ---
            1 /*
            2  * gcc2 name demangler.
            3  *
            4  * gcc2 follows the C++ Annotated Reference Manual section 7.2.1
            5  * name mangling description with a few changes.
            6  * See gpcompare.texi, gxxint_15.html in this directory for the changes.
            7  *
            8  * Not implemented:
            9  *        unicode mangling
           10  *        renaming of operator functions
           11  */
           12 /*
           13 RULES TO ADD:
           14 
           15 _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
           16 
           17 
           18 */
           19 #include <u.h>
           20 #include <libc.h>
           21 #include <bio.h>
           22 #include <mach.h>
           23 
           24 #define debug 0
           25 
           26 typedef struct Chartab Chartab;
           27 struct Chartab
           28 {
           29         char c;
           30         char *s;
           31 };
           32 
           33 static char*
           34 chartabsearch(Chartab *ct, int c)
           35 {
           36         for(; ct->c; ct++)
           37                 if(ct->c == c)
           38                         return ct->s;
           39         return nil;
           40 }
           41 
           42 static Chartab typetab[] =
           43 {
           44         'b',        "bool",
           45         'c',        "char",
           46         'd',        "double",
           47         'e',        "...",
           48         'f',        "float",
           49         'i',        "int",
           50         'J',        "complex",
           51         'l',        "long",
           52         'r',        "long double",
           53         's',        "short",
           54         'v',        "void",
           55         'w',        "wchar_t",
           56         'x',        "long long",
           57         0, 0
           58 };
           59 
           60 static Chartab modifiertab[] =
           61 {
           62         'C',        "const",
           63         'S',        "signed",                /* means static for member functions */
           64         'U',        "unsigned",
           65         'V',        "volatile",
           66 
           67         'G',        "garbage",        /* no idea what this is */
           68         0, 0
           69 };
           70 
           71 static char constructor[] = "constructor";
           72 static char destructor[] = "destructor";
           73 static char gconstructor[] = "$gconstructor";        /* global destructor */
           74 static char gdestructor[] = "$gdestructor";        /* global destructor */
           75 
           76 static char manglestarts[] = "123456789CFHQSUVt";
           77 
           78 static int gccname(char**, char**);
           79 static char *demanglegcc2a(char*, char*);
           80 static char *demanglegcc2b(char*, char*);
           81 static char *demanglegcc2c(char*, char*);
           82 static int gccnumber(char**, int*, int);
           83 
           84 char*
           85 demanglegcc2(char *s, char *buf)
           86 {
           87         char *name, *os, *p, *t;
           88         int isfn, namelen;
           89 
           90 
           91         /*
           92          * Pick off some cases that seem not to fit the pattern.
           93          */
           94         if((t = demanglegcc2a(s, buf)) != nil)
           95                 return t;
           96         if((t = demanglegcc2b(s, buf)) != nil)
           97                 return t;
           98         if((t = demanglegcc2c(s, buf)) != nil)
           99                 return t;
          100 
          101         /*
          102          * First, figure out whether this is a mangled name.
          103          * The name begins with a short version of the name, then __.
          104          * Of course, some C names begin with __ too, so the ultimate
          105          * test is whether what follows __ looks reasonable.
          106          * We use a test on the first letter instead.
          107          *
          108          * Constructors have no name - they begin __ (double underscore).
          109          * Destructors break the rule - they begin _._ (underscore, dot, underscore).
          110          */
          111         os = s;
          112         isfn = 0;
          113         if(memcmp(s, "_._", 3) == 0){
          114                 isfn = 1;
          115                 name = destructor;
          116                 namelen = strlen(name);
          117                 s += 3;
          118         }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){
          119                 isfn = 1;
          120                 name = gdestructor;
          121                 namelen = strlen(name);
          122                 s += 13;
          123         }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){
          124                 isfn = 0;
          125                 name = gdestructor;
          126                 namelen = strlen(name);
          127                 s += 12;
          128         }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){
          129                 isfn = 1;
          130                 name = gconstructor;
          131                 namelen = strlen(name);
          132                 s += 13;
          133         }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){
          134                 isfn = 0;
          135                 name = gconstructor;
          136                 namelen = strlen(name);
          137                 s += 12;
          138         }else{
          139                 t = strstr(os, "__");
          140                 if(t == nil)
          141                         return os;
          142                 do{
          143                         s = t;
          144                         if(strchr(manglestarts, *(s+2)))
          145                                 break;
          146                 }while((t = strstr(s+1, "__")) != nil);
          147 
          148                 name = os;
          149                 namelen = s - os;
          150                 if(namelen == 0){
          151                         isfn = 1;
          152                         name = constructor;
          153                         namelen = strlen(name);
          154                 }
          155                 s += 2;
          156         }
          157 
          158         /*
          159          * Now s points at the mangled crap (maybe).
          160          * and name is the final element of the name.
          161          */
          162         if(strchr(manglestarts, *s) == nil)
          163                 return os;
          164 
          165         p = buf;
          166         if(*s == 'F'){
          167                 /* global function, no extra name pieces, just types */
          168                 isfn = 1;
          169         }else{
          170                 /* parse extra name pieces */
          171                 if(!gccname(&s, &p)){
          172                         if(debug)
          173                                 fprint(2, "parsename %s: %r\n", s);
          174                         return os;
          175                 }
          176 
          177                 /* if we have a constructor or destructor, try to use the C++ name */
          178                 t = nil;
          179                 if(name == constructor || name == destructor){
          180                         *p = 0;
          181                         t = strrchr(buf, ':');
          182                         if(t)
          183                                 t++;
          184                         else
          185                                 t = buf;
          186                 }
          187                 strcpy(p, "::");
          188                 p += 2;
          189                 if(t){
          190                         namelen = strlen(t)-2;
          191                         if(name == destructor)
          192                                 *p++ = '~';
          193                         name = t;
          194                 }
          195         }
          196         if(p >= buf+2 && memcmp(p-2, "::", 2) == 0 && *(p-3) == ')')
          197                 p -= 2;
          198         memmove(p, name, namelen);
          199         p += namelen;
          200 
          201         if(*s == 'F'){
          202                 /* might be from above, or might follow name pieces */
          203                 s++;
          204                 isfn = 1;
          205         }
          206 
          207         /* the rest of the name is argument types - could skip this */
          208         if(*s || isfn){
          209                 *p++ = '(';
          210                 while(*s != 0 && *s != '_'){
          211                         if(!gccname(&s, &p))
          212                                 break;
          213                         *p++ = ',';
          214                 }
          215                 if(*(p-1) == ',')
          216                         p--;
          217                 *p++ = ')';
          218         }
          219 
          220         if(*s == '_'){
          221                 /* return type (left over from H) */
          222         }
          223 
          224         *p = 0;
          225         return buf;
          226 }
          227 
          228 /*
          229  * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
          230  * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos
          231  * (maybe the funny syntax means they are private)
          232  */
          233 static char*
          234 demanglegcc2a(char *s, char *buf)
          235 {
          236         char *p;
          237 
          238         if(*s != '_' || strchr(manglestarts, *(s+1)) == nil)
          239                 return nil;
          240         p = buf;
          241         s++;
          242         if(!gccname(&s, &p))
          243                 return nil;
          244         if(*s != '.')
          245                 return nil;
          246         s++;
          247         strcpy(p, "::");
          248         p += 2;
          249         strcpy(p, s);
          250         return buf;
          251 }
          252 
          253 /*
          254  * _tfb => type info for bool
          255  * __vt_7ostream => vtbl for ostream
          256  */
          257 static char*
          258 demanglegcc2b(char *s, char *buf)
          259 {
          260         char *p;
          261         char *t;
          262 
          263         if(memcmp(s, "__ti", 4) == 0){
          264                 t = "$typeinfo";
          265                 s += 4;
          266         }else if(memcmp(s, "__tf", 4) == 0){
          267                 t = "$typeinfofn";
          268                 s += 4;
          269         }else if(memcmp(s, "__vt_", 5) == 0){
          270                 t = "$vtbl";
          271                 s += 5;
          272         }else
          273                 return nil;
          274 
          275         p = buf;
          276         for(;;){
          277                 if(*s == 0 || !gccname(&s, &p))
          278                         return nil;
          279                 if(*s == 0)
          280                         break;
          281                 if(*s != '.' && *s != '$')
          282                         return nil;
          283                 strcpy(p, "::");
          284                 p += 2;
          285                 s++;
          286         }
          287         strcpy(p, "::");
          288         p += 2;
          289         strcpy(p, t);
          290         return buf;
          291 }
          292 
          293 /*
          294  * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream
          295  */
          296 static char*
          297 demanglegcc2c(char *s, char *buf)
          298 {
          299         int n;
          300         char *p;
          301 
          302         if(memcmp(s, "__thunk_", 8) != 0)
          303                 return nil;
          304         s += 8;
          305         if(!gccnumber(&s, &n, 1))
          306                 return nil;
          307         if(memcmp(s, "__._", 4) != 0)        /* might as well be morse code */
          308                 return nil;
          309         s += 4;
          310         p = buf;
          311         if(!gccname(&s, &p))
          312                 return nil;
          313         strcpy(p, "::$thunk");
          314         return buf;
          315 }
          316 
          317 /*
          318  * Parse a number, a non-empty run of digits.
          319  * If many==0, then only one digit is used, even
          320  * if it is followed by more.  When we need a big
          321  * number in a one-digit slot, it gets bracketed by underscores.
          322  */
          323 static int
          324 gccnumber(char **ps, int *pn, int many)
          325 {
          326         char *s;
          327         int n, eatunderscore;
          328 
          329         s = *ps;
          330         eatunderscore = 0;
          331         if(!many && *s == '_'){
          332                 many = 1;
          333                 s++;
          334                 eatunderscore = 1;
          335         }
          336         if(!isdigit((uchar)*s)){
          337         bad:
          338                 werrstr("bad number %.20s", *ps);
          339                 return 0;
          340         }
          341         if(many)
          342                 n = strtol(s, &s, 10);
          343         else
          344                 n = *s++ - '0';
          345         if(eatunderscore){
          346                 if(*s != '_')
          347                         goto bad;
          348                 s++;
          349         }
          350         *ps = s;
          351         *pn = n;
          352         return 1;
          353 }
          354 
          355 /*
          356  * Pick apart the next mangled name section.
          357  * Names and types are treated as the same.
          358  * Let's see how far we can go before that becomes a problem.
          359  */
          360 static int
          361 gccname(char **ps, char **pp)
          362 {
          363         int i, n, m, val;
          364         char *os, *s, *t, *p, *p0, *p1;
          365 
          366         s = *ps;
          367         os = s;
          368         p = *pp;
          369 
          370 /*        print("\tgccname: %s\n", s); */
          371 
          372         /* basic types */
          373         if((t = chartabsearch(typetab, *s)) != nil){
          374                 s++;
          375                 strcpy(p, t);
          376                 p += strlen(t);
          377                 goto out;
          378         }
          379 
          380         /* modifiers */
          381         if((t = chartabsearch(modifiertab, *s)) != nil){
          382                 s++;
          383                 if(!gccname(&s, &p))
          384                         return 0;
          385                 /*
          386                  * These don't end up in the right place
          387                  * and i don't care anyway
          388                  * (AssertHeld__C17ReaderWriterMutex)
          389                  */
          390                 /*
          391                 *p++ = ' ';
          392                 strcpy(p, t);
          393                 p += strlen(p);
          394                 */
          395                 goto out;
          396         }
          397 
          398         switch(*s){
          399         default:
          400         bad:
          401                 if(debug)
          402                         fprint(2, "gccname: %s (%s)\n", os, s);
          403                 werrstr("bad name %.20s", s);
          404                 return 0;
          405 
          406         case '1': case '2': case '3': case '4':        /* length-prefixed string */
          407         case '5': case '6': case '7': case '8': case '9':
          408                 if(!gccnumber(&s, &n, 1))
          409                         return 0;
          410                 memmove(p, s, n);
          411                 p += n;
          412                 s += n;
          413                 break;
          414 
          415         case 'A':        /* array */
          416                 t = s;
          417                 s++;
          418                 if(!gccnumber(&s, &n, 1))
          419                         return 0;
          420                 if(*s != '_'){
          421                         werrstr("bad array %.20s", t);
          422                         return 0;
          423                 }
          424                 s++;
          425                 sprint(p, "array[%d] ", n);
          426                 p += strlen(p);
          427                 break;
          428 
          429         case 'F':        /* function */
          430                 t = s;
          431                 s++;
          432                 strcpy(p, "fn(");
          433                 p += 3;
          434                 /* arguments */
          435                 while(*s && *s != '_')
          436                         if(!gccname(&s, &p))
          437                                 return 0;
          438                 if(*s != '_'){
          439                         werrstr("unexpected end in function: %s", t);
          440                         return 0;
          441                 }
          442                 s++;
          443                 strcpy(p, " => ");
          444                 p += 4;
          445                 /* return type */
          446                 if(!gccname(&s, &p))
          447                         return 0;
          448                 *p++ = ')';
          449                 break;
          450 
          451         case 'H':        /* template specialization */
          452                 if(memcmp(s-2, "__", 2) != 0)
          453                         fprint(2, "wow: %s\n", s-2);
          454                 t = s;
          455                 s++;
          456                 if(!gccnumber(&s, &n, 0))
          457                         return 0;
          458                 p0 = p;
          459                 /* template arguments */
          460                 *p++ = '<';
          461                 for(i=0; i<n; i++){
          462                         val = 1;
          463                         if(*s == 'Z'){        /* argument is a type, not value */
          464                                 val = 0;
          465                                 s++;
          466                         }
          467                         if(!gccname(&s, &p))
          468                                 return 0;
          469                         if(val){
          470                                 if(!gccnumber(&s, &m, 1))        /* gccnumber: 1 or 0? */
          471                                         return 0;
          472                                 sprint(p, "=%d", m);
          473                                 p += strlen(p);
          474                         }
          475                         if(i+1<n)
          476                                 *p++ = ',';
          477                 }
          478                 *p++ = '>';
          479                 if(*s != '_'){
          480                         werrstr("bad template %s", t);
          481                         return 0;
          482                 }
          483                 s++;
          484 
          485                 /*
          486                  * Can't seem to tell difference between a qualifying name
          487                  * and arguments.  Not sure which is which.  It appears that if
          488                  * you get a name, use it, otherwise look for types.
          489                  * The G type qualifier appears to have no effect other than
          490                  * turning an ambiguous name into a definite type.
          491                  *
          492                  *        SetFlag__H1Zb_P15FlagSettingMode_v
          493                  *        =>        void SetFlag<bool>(FlagSettingMode *)
          494                  *        SetFlag__H1Zb_15FlagSettingMode_v
          495                  *        =>        void FlagSettingMode::SetFlag<bool>()
          496                  *        SetFlag__H1Zb_G15FlagSettingMode_v
          497                  *        =>        void SetFlag<bool>(FlagSettingMode)
          498                  */
          499                 if(strchr("ACFGPRSUVX", *s)){
          500                         /* args */
          501                         t = s;
          502                         p1 = p;
          503                         *p++ = '(';
          504                         while(*s != '_'){
          505                                 if(*s == 0 || !gccname(&s, &p)){
          506                                         werrstr("bad H args: %s", t);
          507                                         return 0;
          508                                 }
          509                         }
          510                         *p++ = ')';
          511                         s++;
          512                 }else{
          513                         p1 = p;
          514                         /* name */
          515                         if(!gccname(&s, &p))
          516                                 return 0;
          517                 }
          518                 /*
          519                  * Need to do some rearrangement of <> () and names here.
          520                  * Doesn't matter since we strip out the <> and () anyway.
          521                  */
          522                 break;
          523 
          524         case 'M':        /* M1S: pointer to member */
          525                 if(*(s+1) != '1' || *(s+2) != 'S')
          526                         goto bad;
          527                 s += 3;
          528                 strcpy(p, "mptr ");
          529                 p += 5;
          530                 if(!gccname(&s, &p))
          531                         return 0;
          532                 break;
          533 
          534         case 'N':        /* multiply-repeated type */
          535                 s++;
          536                 if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0))
          537                         return 0;
          538                 sprint(p, "T%dx%d", m, n);
          539                 p += strlen(p);
          540                 break;
          541 
          542         case 'P':        /* pointer */
          543                 s++;
          544                 strcpy(p, "ptr ");
          545                 p += 4;
          546                 if(!gccname(&s, &p))
          547                         return 0;
          548                 break;
          549 
          550         case 'Q':        /* qualified name */
          551                 s++;
          552                 if(!gccnumber(&s, &n, 0))
          553                         return 0;
          554                 for(i=0; i<n; i++){
          555                         if(!gccname(&s, &p)){
          556                                 werrstr("in hierarchy: %r");
          557                                 return 0;
          558                         }
          559                         if(i+1 < n){
          560                                 strcpy(p, "::");
          561                                 p += 2;
          562                         }
          563                 }
          564                 break;
          565 
          566         case 'R':        /* reference */
          567                 s++;
          568                 strcpy(p, "ref ");
          569                 p += 4;
          570                 if(!gccname(&s, &p))
          571                         return 0;
          572                 break;
          573 
          574         case 't':        /* class template instantiation */
          575                 /* should share code with case 'H' */
          576                 t = s;
          577                 s++;
          578                 if(!gccname(&s, &p))
          579                         return 0;
          580                 if(!gccnumber(&s, &n, 0))
          581                         return 0;
          582                 p0 = p;
          583                 /* template arguments */
          584                 *p++ = '<';
          585                 for(i=0; i<n; i++){
          586                         val = 1;
          587                         if(*s == 'Z'){        /* argument is a type, not value */
          588                                 val = 0;
          589                                 s++;
          590                         }
          591                         if(!gccname(&s, &p))
          592                                 return 0;
          593                         if(val){
          594                                 if(!gccnumber(&s, &m, 1))        /* gccnumber: 1 or 0? */
          595                                         return 0;
          596                                 sprint(p, "=%d", m);
          597                                 p += strlen(p);
          598                         }
          599                         if(i+1<n)
          600                                 *p++ = ',';
          601                 }
          602                 *p++ = '>';
          603                 break;
          604 
          605         case 'T':        /* once-repeated type */
          606                 s++;
          607                 if(!gccnumber(&s, &n, 0))
          608                         return 0;
          609                 sprint(p, "T%d", n);
          610                 p += strlen(p);
          611                 break;
          612 
          613         case 'X':        /* type parameter in 'H' */
          614                 if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2)))
          615                         goto bad;
          616                 memmove(p, s, 3);
          617                 p += 3;
          618                 s += 3;
          619                 break;
          620         }
          621 
          622         USED(p1);
          623         USED(p0);
          624 
          625 out:
          626         *ps = s;
          627         *pp = p;
          628         return 1;
          629 }