URI:
       tr.c - sbase - suckless unix tools
  HTML git clone git://git.suckless.org/sbase
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tr.c (6458B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <stdlib.h>
            3 
            4 #include "utf.h"
            5 #include "util.h"
            6 
            7 static int cflag = 0;
            8 static int dflag = 0;
            9 static int sflag = 0;
           10 
           11 struct range {
           12         Rune   start;
           13         Rune   end;
           14         size_t quant;
           15 };
           16 
           17 static struct {
           18         char    *name;
           19         int    (*check)(Rune);
           20 } classes[] = {
           21         { "alnum",  isalnumrune  },
           22         { "alpha",  isalpharune  },
           23         { "blank",  isblankrune  },
           24         { "cntrl",  iscntrlrune  },
           25         { "digit",  isdigitrune  },
           26         { "graph",  isgraphrune  },
           27         { "lower",  islowerrune  },
           28         { "print",  isprintrune  },
           29         { "punct",  ispunctrune  },
           30         { "space",  isspacerune  },
           31         { "upper",  isupperrune  },
           32         { "xdigit", isxdigitrune },
           33 };
           34 
           35 #define ISLOWERBIT                    1U << 6
           36 #define ISUPPERBIT                    1U << 10
           37 
           38 static struct   range *set1 = NULL;
           39 static size_t   set1ranges  = 0;
           40 static unsigned set1checks  = 0;
           41 static struct   range *set2 = NULL;
           42 static size_t   set2ranges  = 0;
           43 static unsigned set2checks  = 0;
           44 
           45 static int
           46 check(Rune rune, unsigned checks)
           47 {
           48         size_t i;
           49 
           50         for (i = 0; checks && i < LEN(classes); i++, checks >>= 1)
           51                 if (checks & 1 && classes[i].check(rune))
           52                         return 1;
           53 
           54         return 0;
           55 }
           56 
           57 static size_t
           58 rangelen(struct range r)
           59 {
           60         return (r.end - r.start + 1) * r.quant;
           61 }
           62 
           63 static size_t
           64 setlen(struct range *set, size_t setranges)
           65 {
           66         size_t len = 0, i;
           67 
           68         for (i = 0; i < setranges; i++)
           69                 len += rangelen(set[i]);
           70 
           71         return len;
           72 }
           73 
           74 static int
           75 rstrmatch(Rune *r, char *s, size_t n)
           76 {
           77         size_t i;
           78 
           79         for (i = 0; i < n; i++)
           80                 if (r[i] != s[i])
           81                         return 0;
           82         return 1;
           83 }
           84 
           85 static size_t
           86 makeset(char *str, struct range **set, unsigned *checks)
           87 {
           88         Rune  *rstr;
           89         size_t len, i, j, m, n;
           90         size_t q, setranges = 0;
           91         int    factor, base;
           92 
           93         /* rstr defines at most len ranges */
           94         unescape(str);
           95         rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
           96         len = utftorunestr(str, rstr);
           97         *set = ereallocarray(NULL, len, sizeof(**set));
           98 
           99         for (i = 0; i < len; i++) {
          100                 if (rstr[i] == '[') {
          101                         j = i;
          102 nextbrack:
          103                         if (j >= len)
          104                                 goto literal;
          105                         for (m = j; m < len; m++)
          106                                 if (rstr[m] == ']') {
          107                                         j = m;
          108                                         break;
          109                                 }
          110                         if (j == i)
          111                                 goto literal;
          112 
          113                         /* CLASSES [=EQUIV=] (skip) */
          114                         if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
          115                                 if (j - i != 4)
          116                                         goto literal;
          117                                 (*set)[setranges].start = rstr[i + 2];
          118                                 (*set)[setranges].end   = rstr[i + 2];
          119                                 (*set)[setranges].quant = 1;
          120                                 setranges++;
          121                                 i = j;
          122                                 continue;
          123                         }
          124 
          125                         /* CLASSES [:CLASS:] */
          126                         if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
          127                                 for (n = 0; n < LEN(classes); n++) {
          128                                         if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
          129                                                 *checks |= 1 << n;
          130                                                 i = j;
          131                                                 break;
          132                                         }
          133                                 }
          134                                 if (n < LEN(classes))
          135                                         continue;
          136                                 eprintf("Invalid character class.\n");
          137                         }
          138 
          139                         /* REPEAT  [_*n] (only allowed in set2) */
          140                         if (j - i > 2 && rstr[i + 2] == '*') {
          141                                 /* check if right side of '*' is a number */
          142                                 q = 0;
          143                                 factor = 1;
          144                                 base = (rstr[i + 3] == '0') ? 8 : 10;
          145                                 for (n = j - 1; n > i + 2; n--) {
          146                                         if (rstr[n] < '0' || rstr[n] > '9') {
          147                                                 n = 0;
          148                                                 break;
          149                                         }
          150                                         q += (rstr[n] - '0') * factor;
          151                                         factor *= base;
          152                                 }
          153                                 if (n == 0) {
          154                                         j = m + 1;
          155                                         goto nextbrack;
          156                                 }
          157                                 (*set)[setranges].start = rstr[i + 1];
          158                                 (*set)[setranges].end   = rstr[i + 1];
          159                                 (*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
          160                                 setranges++;
          161                                 i = j;
          162                                 continue;
          163                         }
          164 
          165                         j = m + 1;
          166                         goto nextbrack;
          167                 }
          168 literal:
          169                 /* RANGES [_-__-_], _-__-_ */
          170                 /* LITERALS _______ */
          171                 (*set)[setranges].start = rstr[i];
          172 
          173                 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
          174                         i += 2;
          175                 (*set)[setranges].end = rstr[i];
          176                 (*set)[setranges].quant = 1;
          177                 setranges++;
          178         }
          179 
          180         free(rstr);
          181         return setranges;
          182 }
          183 
          184 static void
          185 usage(void)
          186 {
          187         eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
          188 }
          189 
          190 int
          191 main(int argc, char *argv[])
          192 {
          193         Rune r, lastrune = 0;
          194         size_t off1, off2, i, m;
          195         int ret = 0;
          196 
          197         ARGBEGIN {
          198         case 'c':
          199         case 'C':
          200                 cflag = 1;
          201                 break;
          202         case 'd':
          203                 dflag = 1;
          204                 break;
          205         case 's':
          206                 sflag = 1;
          207                 break;
          208         default:
          209                 usage();
          210         } ARGEND
          211 
          212         if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
          213             (dflag && argc != 1))
          214                 usage();
          215 
          216         set1ranges = makeset(argv[0], &set1, &set1checks);
          217         if (argc == 2) {
          218                 set2ranges = makeset(argv[1], &set2, &set2checks);
          219                 /* sanity checks as we are translating */
          220                 if (!set2ranges && !set2checks)
          221                         eprintf("cannot map to an empty set.\n");
          222                 if (set2checks && set2checks != ISLOWERBIT &&
          223                     set2checks != ISUPPERBIT) {
          224                         eprintf("can only map to 'lower' and 'upper' class.\n");
          225                 }
          226         }
          227 read:
          228         if (!efgetrune(&r, stdin, "<stdin>")) {
          229                 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
          230                 return ret;
          231         }
          232         if (argc == 1 && sflag)
          233                 goto write;
          234         for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
          235                 if (set1[i].start <= r && r <= set1[i].end) {
          236                         if (dflag) {
          237                                 if (cflag)
          238                                         goto write;
          239                                 else
          240                                         goto read;
          241                         }
          242                         if (cflag)
          243                                 goto write;
          244 
          245                         /* map r to set2 */
          246                         if (set2checks) {
          247                                 if (set2checks == ISLOWERBIT)
          248                                         r = tolowerrune(r);
          249                                 else
          250                                         r = toupperrune(r);
          251                         } else {
          252                                 off1 += r - set1[i].start;
          253                                 if (off1 > setlen(set2, set2ranges) - 1) {
          254                                         r = set2[set2ranges - 1].end;
          255                                         goto write;
          256                                 }
          257                                 for (m = 0, off2 = 0; m < set2ranges; m++) {
          258                                         if (off2 + rangelen(set2[m]) > off1) {
          259                                                 m++;
          260                                                 break;
          261                                         }
          262                                         off2 += rangelen(set2[m]);
          263                                 }
          264                                 m--;
          265                                 r = set2[m].start + (off1 - off2) / set2[m].quant;
          266                         }
          267                         goto write;
          268                 }
          269         }
          270         if (check(r, set1checks)) {
          271                 if (cflag)
          272                         goto write;
          273                 if (dflag)
          274                         goto read;
          275                 if (set2checks) {
          276                         if (set2checks == ISLOWERBIT)
          277                                 r = tolowerrune(r);
          278                         else
          279                                 r = toupperrune(r);
          280                 } else {
          281                         r = set2[set2ranges - 1].end;
          282                 }
          283                 goto write;
          284         }
          285         if (!dflag && cflag) {
          286                 if (set2checks) {
          287                         if (set2checks == ISLOWERBIT)
          288                                 r = tolowerrune(r);
          289                         else
          290                                 r = toupperrune(r);
          291                 } else {
          292                         r = set2[set2ranges - 1].end;
          293                 }
          294                 goto write;
          295         }
          296         if (dflag && cflag)
          297                 goto read;
          298 write:
          299         if (argc == 1 && sflag && r == lastrune) {
          300                 if (check(r, set1checks))
          301                         goto read;
          302                 for (i = 0; i < set1ranges; i++) {
          303                         if (set1[i].start <= r && r <= set1[i].end)
          304                                 goto read;
          305                 }
          306         }
          307         if (argc == 2 && sflag && r == lastrune) {
          308                 if (set2checks && check(r, set2checks))
          309                         goto read;
          310                 for (i = 0; i < set2ranges; i++) {
          311                         if (set2[i].start <= r && r <= set2[i].end)
          312                                 goto read;
          313                 }
          314         }
          315         efputrune(&r, stdout, "<stdout>");
          316         lastrune = r;
          317         goto read;
          318 }