tr.c - sbase - suckless unix tools
HTML git clone git://git.suckless.org/sbase
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
tr.c (6458B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <stdlib.h>
3
4 #include "utf.h"
5 #include "util.h"
6
7 static int cflag = 0;
8 static int dflag = 0;
9 static int sflag = 0;
10
11 struct range {
12 Rune start;
13 Rune end;
14 size_t quant;
15 };
16
17 static struct {
18 char *name;
19 int (*check)(Rune);
20 } classes[] = {
21 { "alnum", isalnumrune },
22 { "alpha", isalpharune },
23 { "blank", isblankrune },
24 { "cntrl", iscntrlrune },
25 { "digit", isdigitrune },
26 { "graph", isgraphrune },
27 { "lower", islowerrune },
28 { "print", isprintrune },
29 { "punct", ispunctrune },
30 { "space", isspacerune },
31 { "upper", isupperrune },
32 { "xdigit", isxdigitrune },
33 };
34
35 #define ISLOWERBIT 1U << 6
36 #define ISUPPERBIT 1U << 10
37
38 static struct range *set1 = NULL;
39 static size_t set1ranges = 0;
40 static unsigned set1checks = 0;
41 static struct range *set2 = NULL;
42 static size_t set2ranges = 0;
43 static unsigned set2checks = 0;
44
45 static int
46 check(Rune rune, unsigned checks)
47 {
48 size_t i;
49
50 for (i = 0; checks && i < LEN(classes); i++, checks >>= 1)
51 if (checks & 1 && classes[i].check(rune))
52 return 1;
53
54 return 0;
55 }
56
57 static size_t
58 rangelen(struct range r)
59 {
60 return (r.end - r.start + 1) * r.quant;
61 }
62
63 static size_t
64 setlen(struct range *set, size_t setranges)
65 {
66 size_t len = 0, i;
67
68 for (i = 0; i < setranges; i++)
69 len += rangelen(set[i]);
70
71 return len;
72 }
73
74 static int
75 rstrmatch(Rune *r, char *s, size_t n)
76 {
77 size_t i;
78
79 for (i = 0; i < n; i++)
80 if (r[i] != s[i])
81 return 0;
82 return 1;
83 }
84
85 static size_t
86 makeset(char *str, struct range **set, unsigned *checks)
87 {
88 Rune *rstr;
89 size_t len, i, j, m, n;
90 size_t q, setranges = 0;
91 int factor, base;
92
93 /* rstr defines at most len ranges */
94 unescape(str);
95 rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
96 len = utftorunestr(str, rstr);
97 *set = ereallocarray(NULL, len, sizeof(**set));
98
99 for (i = 0; i < len; i++) {
100 if (rstr[i] == '[') {
101 j = i;
102 nextbrack:
103 if (j >= len)
104 goto literal;
105 for (m = j; m < len; m++)
106 if (rstr[m] == ']') {
107 j = m;
108 break;
109 }
110 if (j == i)
111 goto literal;
112
113 /* CLASSES [=EQUIV=] (skip) */
114 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
115 if (j - i != 4)
116 goto literal;
117 (*set)[setranges].start = rstr[i + 2];
118 (*set)[setranges].end = rstr[i + 2];
119 (*set)[setranges].quant = 1;
120 setranges++;
121 i = j;
122 continue;
123 }
124
125 /* CLASSES [:CLASS:] */
126 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
127 for (n = 0; n < LEN(classes); n++) {
128 if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
129 *checks |= 1 << n;
130 i = j;
131 break;
132 }
133 }
134 if (n < LEN(classes))
135 continue;
136 eprintf("Invalid character class.\n");
137 }
138
139 /* REPEAT [_*n] (only allowed in set2) */
140 if (j - i > 2 && rstr[i + 2] == '*') {
141 /* check if right side of '*' is a number */
142 q = 0;
143 factor = 1;
144 base = (rstr[i + 3] == '0') ? 8 : 10;
145 for (n = j - 1; n > i + 2; n--) {
146 if (rstr[n] < '0' || rstr[n] > '9') {
147 n = 0;
148 break;
149 }
150 q += (rstr[n] - '0') * factor;
151 factor *= base;
152 }
153 if (n == 0) {
154 j = m + 1;
155 goto nextbrack;
156 }
157 (*set)[setranges].start = rstr[i + 1];
158 (*set)[setranges].end = rstr[i + 1];
159 (*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
160 setranges++;
161 i = j;
162 continue;
163 }
164
165 j = m + 1;
166 goto nextbrack;
167 }
168 literal:
169 /* RANGES [_-__-_], _-__-_ */
170 /* LITERALS _______ */
171 (*set)[setranges].start = rstr[i];
172
173 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
174 i += 2;
175 (*set)[setranges].end = rstr[i];
176 (*set)[setranges].quant = 1;
177 setranges++;
178 }
179
180 free(rstr);
181 return setranges;
182 }
183
184 static void
185 usage(void)
186 {
187 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
188 }
189
190 int
191 main(int argc, char *argv[])
192 {
193 Rune r, lastrune = 0;
194 size_t off1, off2, i, m;
195 int ret = 0;
196
197 ARGBEGIN {
198 case 'c':
199 case 'C':
200 cflag = 1;
201 break;
202 case 'd':
203 dflag = 1;
204 break;
205 case 's':
206 sflag = 1;
207 break;
208 default:
209 usage();
210 } ARGEND
211
212 if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
213 (dflag && argc != 1))
214 usage();
215
216 set1ranges = makeset(argv[0], &set1, &set1checks);
217 if (argc == 2) {
218 set2ranges = makeset(argv[1], &set2, &set2checks);
219 /* sanity checks as we are translating */
220 if (!set2ranges && !set2checks)
221 eprintf("cannot map to an empty set.\n");
222 if (set2checks && set2checks != ISLOWERBIT &&
223 set2checks != ISUPPERBIT) {
224 eprintf("can only map to 'lower' and 'upper' class.\n");
225 }
226 }
227 read:
228 if (!efgetrune(&r, stdin, "<stdin>")) {
229 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
230 return ret;
231 }
232 if (argc == 1 && sflag)
233 goto write;
234 for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
235 if (set1[i].start <= r && r <= set1[i].end) {
236 if (dflag) {
237 if (cflag)
238 goto write;
239 else
240 goto read;
241 }
242 if (cflag)
243 goto write;
244
245 /* map r to set2 */
246 if (set2checks) {
247 if (set2checks == ISLOWERBIT)
248 r = tolowerrune(r);
249 else
250 r = toupperrune(r);
251 } else {
252 off1 += r - set1[i].start;
253 if (off1 > setlen(set2, set2ranges) - 1) {
254 r = set2[set2ranges - 1].end;
255 goto write;
256 }
257 for (m = 0, off2 = 0; m < set2ranges; m++) {
258 if (off2 + rangelen(set2[m]) > off1) {
259 m++;
260 break;
261 }
262 off2 += rangelen(set2[m]);
263 }
264 m--;
265 r = set2[m].start + (off1 - off2) / set2[m].quant;
266 }
267 goto write;
268 }
269 }
270 if (check(r, set1checks)) {
271 if (cflag)
272 goto write;
273 if (dflag)
274 goto read;
275 if (set2checks) {
276 if (set2checks == ISLOWERBIT)
277 r = tolowerrune(r);
278 else
279 r = toupperrune(r);
280 } else {
281 r = set2[set2ranges - 1].end;
282 }
283 goto write;
284 }
285 if (!dflag && cflag) {
286 if (set2checks) {
287 if (set2checks == ISLOWERBIT)
288 r = tolowerrune(r);
289 else
290 r = toupperrune(r);
291 } else {
292 r = set2[set2ranges - 1].end;
293 }
294 goto write;
295 }
296 if (dflag && cflag)
297 goto read;
298 write:
299 if (argc == 1 && sflag && r == lastrune) {
300 if (check(r, set1checks))
301 goto read;
302 for (i = 0; i < set1ranges; i++) {
303 if (set1[i].start <= r && r <= set1[i].end)
304 goto read;
305 }
306 }
307 if (argc == 2 && sflag && r == lastrune) {
308 if (set2checks && check(r, set2checks))
309 goto read;
310 for (i = 0; i < set2ranges; i++) {
311 if (set2[i].start <= r && r <= set2[i].end)
312 goto read;
313 }
314 }
315 efputrune(&r, stdout, "<stdout>");
316 lastrune = r;
317 goto read;
318 }