Commit | Line | Data |
---|---|---|
01f1c879 SR |
1 | /* |
2 | * Copyright (c) 2002 - 2005 Tony Finch <dot@dotat.at>. All rights reserved. | |
3 | * | |
4 | * This code is derived from software contributed to Berkeley by Dave Yost. | |
5 | * It was rewritten to support ANSI C by Tony Finch. The original version of | |
6 | * unifdef carried the following copyright notice. None of its code remains | |
7 | * in this version (though some of the names remain). | |
8 | * | |
9 | * Copyright (c) 1985, 1993 | |
10 | * The Regents of the University of California. All rights reserved. | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | |
15 | * 1. Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * 2. Redistributions in binary form must reproduce the above copyright | |
18 | * notice, this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #include <sys/cdefs.h> | |
35 | ||
36 | #ifndef lint | |
37 | #if 0 | |
38 | static const char copyright[] = | |
39 | "@(#) Copyright (c) 1985, 1993\n\ | |
40 | The Regents of the University of California. All rights reserved.\n"; | |
41 | #endif | |
42 | #ifdef __IDSTRING | |
43 | __IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"); | |
44 | __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $"); | |
45 | __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.171 2005/03/08 12:38:48 fanf2 Exp $"); | |
46 | #endif | |
47 | #endif /* not lint */ | |
48 | #ifdef __FBSDID | |
49 | __FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05/21 09:55:09 ru Exp $"); | |
50 | #endif | |
51 | ||
52 | /* | |
53 | * unifdef - remove ifdef'ed lines | |
54 | * | |
55 | * Wishlist: | |
56 | * provide an option which will append the name of the | |
57 | * appropriate symbol after #else's and #endif's | |
58 | * provide an option which will check symbols after | |
59 | * #else's and #endif's to see that they match their | |
60 | * corresponding #ifdef or #ifndef | |
61 | * | |
62 | * The first two items above require better buffer handling, which would | |
63 | * also make it possible to handle all "dodgy" directives correctly. | |
64 | */ | |
65 | ||
66 | #include <ctype.h> | |
67 | #include <err.h> | |
68 | #include <stdarg.h> | |
69 | #include <stdbool.h> | |
70 | #include <stdio.h> | |
71 | #include <stdlib.h> | |
72 | #include <string.h> | |
73 | #include <unistd.h> | |
74 | ||
75 | size_t strlcpy(char *dst, const char *src, size_t siz); | |
76 | ||
77 | /* types of input lines: */ | |
78 | typedef enum { | |
79 | LT_TRUEI, /* a true #if with ignore flag */ | |
80 | LT_FALSEI, /* a false #if with ignore flag */ | |
81 | LT_IF, /* an unknown #if */ | |
82 | LT_TRUE, /* a true #if */ | |
83 | LT_FALSE, /* a false #if */ | |
84 | LT_ELIF, /* an unknown #elif */ | |
85 | LT_ELTRUE, /* a true #elif */ | |
86 | LT_ELFALSE, /* a false #elif */ | |
87 | LT_ELSE, /* #else */ | |
88 | LT_ENDIF, /* #endif */ | |
89 | LT_DODGY, /* flag: directive is not on one line */ | |
90 | LT_DODGY_LAST = LT_DODGY + LT_ENDIF, | |
91 | LT_PLAIN, /* ordinary line */ | |
92 | LT_EOF, /* end of file */ | |
93 | LT_COUNT | |
94 | } Linetype; | |
95 | ||
96 | static char const * const linetype_name[] = { | |
97 | "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", | |
98 | "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", | |
99 | "DODGY TRUEI", "DODGY FALSEI", | |
100 | "DODGY IF", "DODGY TRUE", "DODGY FALSE", | |
101 | "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", | |
102 | "DODGY ELSE", "DODGY ENDIF", | |
103 | "PLAIN", "EOF" | |
104 | }; | |
105 | ||
106 | /* state of #if processing */ | |
107 | typedef enum { | |
108 | IS_OUTSIDE, | |
109 | IS_FALSE_PREFIX, /* false #if followed by false #elifs */ | |
110 | IS_TRUE_PREFIX, /* first non-false #(el)if is true */ | |
111 | IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ | |
112 | IS_FALSE_MIDDLE, /* a false #elif after a pass state */ | |
113 | IS_TRUE_MIDDLE, /* a true #elif after a pass state */ | |
114 | IS_PASS_ELSE, /* an else after a pass state */ | |
115 | IS_FALSE_ELSE, /* an else after a true state */ | |
116 | IS_TRUE_ELSE, /* an else after only false states */ | |
117 | IS_FALSE_TRAILER, /* #elifs after a true are false */ | |
118 | IS_COUNT | |
119 | } Ifstate; | |
120 | ||
121 | static char const * const ifstate_name[] = { | |
122 | "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", | |
123 | "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", | |
124 | "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", | |
125 | "FALSE_TRAILER" | |
126 | }; | |
127 | ||
128 | /* state of comment parser */ | |
129 | typedef enum { | |
130 | NO_COMMENT = false, /* outside a comment */ | |
131 | C_COMMENT, /* in a comment like this one */ | |
132 | CXX_COMMENT, /* between // and end of line */ | |
133 | STARTING_COMMENT, /* just after slash-backslash-newline */ | |
134 | FINISHING_COMMENT, /* star-backslash-newline in a C comment */ | |
135 | CHAR_LITERAL, /* inside '' */ | |
136 | STRING_LITERAL /* inside "" */ | |
137 | } Comment_state; | |
138 | ||
139 | static char const * const comment_name[] = { | |
140 | "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING" | |
141 | }; | |
142 | ||
143 | /* state of preprocessor line parser */ | |
144 | typedef enum { | |
145 | LS_START, /* only space and comments on this line */ | |
146 | LS_HASH, /* only space, comments, and a hash */ | |
147 | LS_DIRTY /* this line can't be a preprocessor line */ | |
148 | } Line_state; | |
149 | ||
150 | static char const * const linestate_name[] = { | |
151 | "START", "HASH", "DIRTY" | |
152 | }; | |
153 | ||
154 | /* | |
155 | * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 | |
156 | */ | |
157 | #define MAXDEPTH 64 /* maximum #if nesting */ | |
158 | #define MAXLINE 4096 /* maximum length of line */ | |
159 | #define MAXSYMS 4096 /* maximum number of symbols */ | |
160 | ||
161 | /* | |
162 | * Sometimes when editing a keyword the replacement text is longer, so | |
163 | * we leave some space at the end of the tline buffer to accommodate this. | |
164 | */ | |
165 | #define EDITSLOP 10 | |
166 | ||
167 | /* | |
168 | * Globals. | |
169 | */ | |
170 | ||
171 | static bool complement; /* -c: do the complement */ | |
172 | static bool debugging; /* -d: debugging reports */ | |
173 | static bool iocccok; /* -e: fewer IOCCC errors */ | |
174 | static bool killconsts; /* -k: eval constant #ifs */ | |
175 | static bool lnblank; /* -l: blank deleted lines */ | |
176 | static bool lnnum; /* -n: add #line directives */ | |
177 | static bool symlist; /* -s: output symbol list */ | |
178 | static bool text; /* -t: this is a text file */ | |
179 | ||
180 | static const char *symname[MAXSYMS]; /* symbol name */ | |
181 | static const char *value[MAXSYMS]; /* -Dsym=value */ | |
182 | static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ | |
183 | static int nsyms; /* number of symbols */ | |
184 | ||
185 | static FILE *input; /* input file pointer */ | |
186 | static const char *filename; /* input file name */ | |
187 | static int linenum; /* current line number */ | |
188 | ||
189 | static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ | |
190 | static char *keyword; /* used for editing #elif's */ | |
191 | ||
192 | static Comment_state incomment; /* comment parser state */ | |
193 | static Line_state linestate; /* #if line parser state */ | |
194 | static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ | |
195 | static bool ignoring[MAXDEPTH]; /* ignore comments state */ | |
196 | static int stifline[MAXDEPTH]; /* start of current #if */ | |
197 | static int depth; /* current #if nesting */ | |
198 | static int delcount; /* count of deleted lines */ | |
199 | static bool keepthis; /* don't delete constant #if */ | |
200 | ||
201 | static int exitstat; /* program exit status */ | |
202 | ||
203 | static void addsym(bool, bool, char *); | |
204 | static void debug(const char *, ...); | |
205 | static void done(void); | |
206 | static void error(const char *); | |
207 | static int findsym(const char *); | |
208 | static void flushline(bool); | |
d15bd106 | 209 | static Linetype get_line(void); |
01f1c879 SR |
210 | static Linetype ifeval(const char **); |
211 | static void ignoreoff(void); | |
212 | static void ignoreon(void); | |
213 | static void keywordedit(const char *); | |
214 | static void nest(void); | |
215 | static void process(void); | |
216 | static const char *skipcomment(const char *); | |
217 | static const char *skipsym(const char *); | |
218 | static void state(Ifstate); | |
219 | static int strlcmp(const char *, const char *, size_t); | |
220 | static void unnest(void); | |
221 | static void usage(void); | |
222 | ||
223 | #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') | |
224 | ||
225 | /* | |
226 | * The main program. | |
227 | */ | |
228 | int | |
229 | main(int argc, char *argv[]) | |
230 | { | |
231 | int opt; | |
232 | ||
233 | while ((opt = getopt(argc, argv, "i:D:U:I:cdeklnst")) != -1) | |
234 | switch (opt) { | |
235 | case 'i': /* treat stuff controlled by these symbols as text */ | |
236 | /* | |
237 | * For strict backwards-compatibility the U or D | |
238 | * should be immediately after the -i but it doesn't | |
239 | * matter much if we relax that requirement. | |
240 | */ | |
241 | opt = *optarg++; | |
242 | if (opt == 'D') | |
243 | addsym(true, true, optarg); | |
244 | else if (opt == 'U') | |
245 | addsym(true, false, optarg); | |
246 | else | |
247 | usage(); | |
248 | break; | |
249 | case 'D': /* define a symbol */ | |
250 | addsym(false, true, optarg); | |
251 | break; | |
252 | case 'U': /* undef a symbol */ | |
253 | addsym(false, false, optarg); | |
254 | break; | |
255 | case 'I': | |
256 | /* no-op for compatibility with cpp */ | |
257 | break; | |
258 | case 'c': /* treat -D as -U and vice versa */ | |
259 | complement = true; | |
260 | break; | |
261 | case 'd': | |
262 | debugging = true; | |
263 | break; | |
264 | case 'e': /* fewer errors from dodgy lines */ | |
265 | iocccok = true; | |
266 | break; | |
267 | case 'k': /* process constant #ifs */ | |
268 | killconsts = true; | |
269 | break; | |
270 | case 'l': /* blank deleted lines instead of omitting them */ | |
271 | lnblank = true; | |
272 | break; | |
273 | case 'n': /* add #line directive after deleted lines */ | |
274 | lnnum = true; | |
275 | break; | |
276 | case 's': /* only output list of symbols that control #ifs */ | |
277 | symlist = true; | |
278 | break; | |
279 | case 't': /* don't parse C comments */ | |
280 | text = true; | |
281 | break; | |
282 | default: | |
283 | usage(); | |
284 | } | |
285 | argc -= optind; | |
286 | argv += optind; | |
287 | if (argc > 1) { | |
288 | errx(2, "can only do one file"); | |
289 | } else if (argc == 1 && strcmp(*argv, "-") != 0) { | |
290 | filename = *argv; | |
291 | input = fopen(filename, "r"); | |
292 | if (input == NULL) | |
293 | err(2, "can't open %s", filename); | |
294 | } else { | |
295 | filename = "[stdin]"; | |
296 | input = stdin; | |
297 | } | |
298 | process(); | |
299 | abort(); /* bug */ | |
300 | } | |
301 | ||
302 | static void | |
303 | usage(void) | |
304 | { | |
305 | fprintf(stderr, "usage: unifdef [-cdeklnst] [-Ipath]" | |
306 | " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); | |
307 | exit(2); | |
308 | } | |
309 | ||
310 | /* | |
311 | * A state transition function alters the global #if processing state | |
312 | * in a particular way. The table below is indexed by the current | |
313 | * processing state and the type of the current line. | |
314 | * | |
315 | * Nesting is handled by keeping a stack of states; some transition | |
316 | * functions increase or decrease the depth. They also maintain the | |
317 | * ignore state on a stack. In some complicated cases they have to | |
318 | * alter the preprocessor directive, as follows. | |
319 | * | |
320 | * When we have processed a group that starts off with a known-false | |
321 | * #if/#elif sequence (which has therefore been deleted) followed by a | |
322 | * #elif that we don't understand and therefore must keep, we edit the | |
323 | * latter into a #if to keep the nesting correct. | |
324 | * | |
325 | * When we find a true #elif in a group, the following block will | |
326 | * always be kept and the rest of the sequence after the next #elif or | |
327 | * #else will be discarded. We edit the #elif into a #else and the | |
328 | * following directive to #endif since this has the desired behaviour. | |
329 | * | |
330 | * "Dodgy" directives are split across multiple lines, the most common | |
331 | * example being a multi-line comment hanging off the right of the | |
332 | * directive. We can handle them correctly only if there is no change | |
333 | * from printing to dropping (or vice versa) caused by that directive. | |
334 | * If the directive is the first of a group we have a choice between | |
335 | * failing with an error, or passing it through unchanged instead of | |
336 | * evaluating it. The latter is not the default to avoid questions from | |
337 | * users about unifdef unexpectedly leaving behind preprocessor directives. | |
338 | */ | |
339 | typedef void state_fn(void); | |
340 | ||
341 | /* report an error */ | |
342 | static void Eelif (void) { error("Inappropriate #elif"); } | |
343 | static void Eelse (void) { error("Inappropriate #else"); } | |
344 | static void Eendif(void) { error("Inappropriate #endif"); } | |
345 | static void Eeof (void) { error("Premature EOF"); } | |
346 | static void Eioccc(void) { error("Obfuscated preprocessor control line"); } | |
347 | /* plain line handling */ | |
348 | static void print (void) { flushline(true); } | |
349 | static void drop (void) { flushline(false); } | |
350 | /* output lacks group's start line */ | |
351 | static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } | |
352 | static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } | |
353 | static void Selse (void) { drop(); state(IS_TRUE_ELSE); } | |
354 | /* print/pass this block */ | |
355 | static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } | |
356 | static void Pelse (void) { print(); state(IS_PASS_ELSE); } | |
357 | static void Pendif(void) { print(); unnest(); } | |
358 | /* discard this block */ | |
359 | static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } | |
360 | static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } | |
361 | static void Delse (void) { drop(); state(IS_FALSE_ELSE); } | |
362 | static void Dendif(void) { drop(); unnest(); } | |
363 | /* first line of group */ | |
364 | static void Fdrop (void) { nest(); Dfalse(); } | |
365 | static void Fpass (void) { nest(); Pelif(); } | |
366 | static void Ftrue (void) { nest(); Strue(); } | |
367 | static void Ffalse(void) { nest(); Sfalse(); } | |
368 | /* variable pedantry for obfuscated lines */ | |
369 | static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); } | |
370 | static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); } | |
371 | static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); } | |
372 | /* ignore comments in this block */ | |
373 | static void Idrop (void) { Fdrop(); ignoreon(); } | |
374 | static void Itrue (void) { Ftrue(); ignoreon(); } | |
375 | static void Ifalse(void) { Ffalse(); ignoreon(); } | |
376 | /* edit this line */ | |
377 | static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } | |
378 | static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } | |
379 | static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } | |
380 | static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } | |
381 | ||
382 | static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { | |
383 | /* IS_OUTSIDE */ | |
384 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, | |
385 | Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, | |
386 | print, done }, | |
387 | /* IS_FALSE_PREFIX */ | |
388 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, | |
389 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, | |
390 | drop, Eeof }, | |
391 | /* IS_TRUE_PREFIX */ | |
392 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, | |
393 | Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, | |
394 | print, Eeof }, | |
395 | /* IS_PASS_MIDDLE */ | |
396 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, | |
397 | Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, | |
398 | print, Eeof }, | |
399 | /* IS_FALSE_MIDDLE */ | |
400 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, | |
401 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, | |
402 | drop, Eeof }, | |
403 | /* IS_TRUE_MIDDLE */ | |
404 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, | |
405 | Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, | |
406 | print, Eeof }, | |
407 | /* IS_PASS_ELSE */ | |
408 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, | |
409 | Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, | |
410 | print, Eeof }, | |
411 | /* IS_FALSE_ELSE */ | |
412 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, | |
413 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, | |
414 | drop, Eeof }, | |
415 | /* IS_TRUE_ELSE */ | |
416 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, | |
417 | Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, | |
418 | print, Eeof }, | |
419 | /* IS_FALSE_TRAILER */ | |
420 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, | |
421 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, | |
422 | drop, Eeof } | |
423 | /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF | |
424 | TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) | |
425 | PLAIN EOF */ | |
426 | }; | |
427 | ||
428 | /* | |
429 | * State machine utility functions | |
430 | */ | |
431 | static void | |
432 | done(void) | |
433 | { | |
434 | if (incomment) | |
435 | error("EOF in comment"); | |
436 | exit(exitstat); | |
437 | } | |
438 | static void | |
439 | ignoreoff(void) | |
440 | { | |
441 | if (depth == 0) | |
442 | abort(); /* bug */ | |
443 | ignoring[depth] = ignoring[depth-1]; | |
444 | } | |
445 | static void | |
446 | ignoreon(void) | |
447 | { | |
448 | ignoring[depth] = true; | |
449 | } | |
450 | static void | |
451 | keywordedit(const char *replacement) | |
452 | { | |
14a036d2 SR |
453 | size_t size = tline + sizeof(tline) - keyword; |
454 | char *dst = keyword; | |
455 | const char *src = replacement; | |
456 | if (size != 0) { | |
457 | while ((--size != 0) && (*src != '\0')) | |
458 | *dst++ = *src++; | |
459 | *dst = '\0'; | |
460 | } | |
01f1c879 SR |
461 | print(); |
462 | } | |
463 | static void | |
464 | nest(void) | |
465 | { | |
466 | depth += 1; | |
467 | if (depth >= MAXDEPTH) | |
468 | error("Too many levels of nesting"); | |
469 | stifline[depth] = linenum; | |
470 | } | |
471 | static void | |
472 | unnest(void) | |
473 | { | |
474 | if (depth == 0) | |
475 | abort(); /* bug */ | |
476 | depth -= 1; | |
477 | } | |
478 | static void | |
479 | state(Ifstate is) | |
480 | { | |
481 | ifstate[depth] = is; | |
482 | } | |
483 | ||
484 | /* | |
485 | * Write a line to the output or not, according to command line options. | |
486 | */ | |
487 | static void | |
488 | flushline(bool keep) | |
489 | { | |
490 | if (symlist) | |
491 | return; | |
492 | if (keep ^ complement) { | |
493 | if (lnnum && delcount > 0) | |
494 | printf("#line %d\n", linenum); | |
495 | fputs(tline, stdout); | |
496 | delcount = 0; | |
497 | } else { | |
498 | if (lnblank) | |
499 | putc('\n', stdout); | |
500 | exitstat = 1; | |
501 | delcount += 1; | |
502 | } | |
503 | } | |
504 | ||
505 | /* | |
506 | * The driver for the state machine. | |
507 | */ | |
508 | static void | |
509 | process(void) | |
510 | { | |
511 | Linetype lineval; | |
512 | ||
513 | for (;;) { | |
514 | linenum++; | |
d15bd106 | 515 | lineval = get_line(); |
01f1c879 SR |
516 | trans_table[ifstate[depth]][lineval](); |
517 | debug("process %s -> %s depth %d", | |
518 | linetype_name[lineval], | |
519 | ifstate_name[ifstate[depth]], depth); | |
520 | } | |
521 | } | |
522 | ||
523 | /* | |
524 | * Parse a line and determine its type. We keep the preprocessor line | |
525 | * parser state between calls in the global variable linestate, with | |
526 | * help from skipcomment(). | |
527 | */ | |
528 | static Linetype | |
d15bd106 | 529 | get_line(void) |
01f1c879 SR |
530 | { |
531 | const char *cp; | |
532 | int cursym; | |
533 | int kwlen; | |
534 | Linetype retval; | |
535 | Comment_state wascomment; | |
536 | ||
537 | if (fgets(tline, MAXLINE, input) == NULL) | |
538 | return (LT_EOF); | |
539 | retval = LT_PLAIN; | |
540 | wascomment = incomment; | |
541 | cp = skipcomment(tline); | |
542 | if (linestate == LS_START) { | |
543 | if (*cp == '#') { | |
544 | linestate = LS_HASH; | |
545 | cp = skipcomment(cp + 1); | |
546 | } else if (*cp != '\0') | |
547 | linestate = LS_DIRTY; | |
548 | } | |
549 | if (!incomment && linestate == LS_HASH) { | |
550 | keyword = tline + (cp - tline); | |
551 | cp = skipsym(cp); | |
552 | kwlen = cp - keyword; | |
553 | /* no way can we deal with a continuation inside a keyword */ | |
554 | if (strncmp(cp, "\\\n", 2) == 0) | |
555 | Eioccc(); | |
556 | if (strlcmp("ifdef", keyword, kwlen) == 0 || | |
557 | strlcmp("ifndef", keyword, kwlen) == 0) { | |
558 | cp = skipcomment(cp); | |
559 | if ((cursym = findsym(cp)) < 0) | |
560 | retval = LT_IF; | |
561 | else { | |
562 | retval = (keyword[2] == 'n') | |
563 | ? LT_FALSE : LT_TRUE; | |
564 | if (value[cursym] == NULL) | |
565 | retval = (retval == LT_TRUE) | |
566 | ? LT_FALSE : LT_TRUE; | |
567 | if (ignore[cursym]) | |
568 | retval = (retval == LT_TRUE) | |
569 | ? LT_TRUEI : LT_FALSEI; | |
570 | } | |
571 | cp = skipsym(cp); | |
572 | } else if (strlcmp("if", keyword, kwlen) == 0) | |
573 | retval = ifeval(&cp); | |
574 | else if (strlcmp("elif", keyword, kwlen) == 0) | |
575 | retval = ifeval(&cp) - LT_IF + LT_ELIF; | |
576 | else if (strlcmp("else", keyword, kwlen) == 0) | |
577 | retval = LT_ELSE; | |
578 | else if (strlcmp("endif", keyword, kwlen) == 0) | |
579 | retval = LT_ENDIF; | |
580 | else { | |
581 | linestate = LS_DIRTY; | |
582 | retval = LT_PLAIN; | |
583 | } | |
584 | cp = skipcomment(cp); | |
585 | if (*cp != '\0') { | |
586 | linestate = LS_DIRTY; | |
587 | if (retval == LT_TRUE || retval == LT_FALSE || | |
588 | retval == LT_TRUEI || retval == LT_FALSEI) | |
589 | retval = LT_IF; | |
590 | if (retval == LT_ELTRUE || retval == LT_ELFALSE) | |
591 | retval = LT_ELIF; | |
592 | } | |
593 | if (retval != LT_PLAIN && (wascomment || incomment)) { | |
594 | retval += LT_DODGY; | |
595 | if (incomment) | |
596 | linestate = LS_DIRTY; | |
597 | } | |
598 | /* skipcomment should have changed the state */ | |
599 | if (linestate == LS_HASH) | |
600 | abort(); /* bug */ | |
601 | } | |
602 | if (linestate == LS_DIRTY) { | |
603 | while (*cp != '\0') | |
604 | cp = skipcomment(cp + 1); | |
605 | } | |
606 | debug("parser %s comment %s line", | |
607 | comment_name[incomment], linestate_name[linestate]); | |
608 | return (retval); | |
609 | } | |
610 | ||
611 | /* | |
612 | * These are the binary operators that are supported by the expression | |
613 | * evaluator. Note that if support for division is added then we also | |
614 | * need short-circuiting booleans because of divide-by-zero. | |
615 | */ | |
616 | static int op_lt(int a, int b) { return (a < b); } | |
617 | static int op_gt(int a, int b) { return (a > b); } | |
618 | static int op_le(int a, int b) { return (a <= b); } | |
619 | static int op_ge(int a, int b) { return (a >= b); } | |
620 | static int op_eq(int a, int b) { return (a == b); } | |
621 | static int op_ne(int a, int b) { return (a != b); } | |
622 | static int op_or(int a, int b) { return (a || b); } | |
623 | static int op_and(int a, int b) { return (a && b); } | |
624 | ||
625 | /* | |
626 | * An evaluation function takes three arguments, as follows: (1) a pointer to | |
627 | * an element of the precedence table which lists the operators at the current | |
628 | * level of precedence; (2) a pointer to an integer which will receive the | |
629 | * value of the expression; and (3) a pointer to a char* that points to the | |
630 | * expression to be evaluated and that is updated to the end of the expression | |
631 | * when evaluation is complete. The function returns LT_FALSE if the value of | |
632 | * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the | |
633 | * expression could not be evaluated. | |
634 | */ | |
635 | struct ops; | |
636 | ||
637 | typedef Linetype eval_fn(const struct ops *, int *, const char **); | |
638 | ||
639 | static eval_fn eval_table, eval_unary; | |
640 | ||
641 | /* | |
642 | * The precedence table. Expressions involving binary operators are evaluated | |
643 | * in a table-driven way by eval_table. When it evaluates a subexpression it | |
644 | * calls the inner function with its first argument pointing to the next | |
645 | * element of the table. Innermost expressions have special non-table-driven | |
646 | * handling. | |
647 | */ | |
648 | static const struct ops { | |
649 | eval_fn *inner; | |
650 | struct op { | |
651 | const char *str; | |
652 | int (*fn)(int, int); | |
653 | } op[5]; | |
654 | } eval_ops[] = { | |
655 | { eval_table, { { "||", op_or } } }, | |
656 | { eval_table, { { "&&", op_and } } }, | |
657 | { eval_table, { { "==", op_eq }, | |
658 | { "!=", op_ne } } }, | |
659 | { eval_unary, { { "<=", op_le }, | |
660 | { ">=", op_ge }, | |
661 | { "<", op_lt }, | |
662 | { ">", op_gt } } } | |
663 | }; | |
664 | ||
665 | /* | |
666 | * Function for evaluating the innermost parts of expressions, | |
667 | * viz. !expr (expr) defined(symbol) symbol number | |
668 | * We reset the keepthis flag when we find a non-constant subexpression. | |
669 | */ | |
670 | static Linetype | |
671 | eval_unary(const struct ops *ops, int *valp, const char **cpp) | |
672 | { | |
673 | const char *cp; | |
674 | char *ep; | |
675 | int sym; | |
676 | ||
677 | cp = skipcomment(*cpp); | |
678 | if (*cp == '!') { | |
679 | debug("eval%d !", ops - eval_ops); | |
680 | cp++; | |
eedc9d83 RK |
681 | if (eval_unary(ops, valp, &cp) == LT_IF) { |
682 | *cpp = cp; | |
01f1c879 | 683 | return (LT_IF); |
eedc9d83 | 684 | } |
01f1c879 SR |
685 | *valp = !*valp; |
686 | } else if (*cp == '(') { | |
687 | cp++; | |
688 | debug("eval%d (", ops - eval_ops); | |
689 | if (eval_table(eval_ops, valp, &cp) == LT_IF) | |
690 | return (LT_IF); | |
691 | cp = skipcomment(cp); | |
692 | if (*cp++ != ')') | |
693 | return (LT_IF); | |
694 | } else if (isdigit((unsigned char)*cp)) { | |
695 | debug("eval%d number", ops - eval_ops); | |
696 | *valp = strtol(cp, &ep, 0); | |
697 | cp = skipsym(cp); | |
698 | } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { | |
699 | cp = skipcomment(cp+7); | |
700 | debug("eval%d defined", ops - eval_ops); | |
701 | if (*cp++ != '(') | |
702 | return (LT_IF); | |
703 | cp = skipcomment(cp); | |
704 | sym = findsym(cp); | |
01f1c879 SR |
705 | cp = skipsym(cp); |
706 | cp = skipcomment(cp); | |
707 | if (*cp++ != ')') | |
708 | return (LT_IF); | |
eedc9d83 RK |
709 | if (sym >= 0) |
710 | *valp = (value[sym] != NULL); | |
711 | else { | |
712 | *cpp = cp; | |
713 | return (LT_IF); | |
714 | } | |
01f1c879 SR |
715 | keepthis = false; |
716 | } else if (!endsym(*cp)) { | |
717 | debug("eval%d symbol", ops - eval_ops); | |
718 | sym = findsym(cp); | |
719 | if (sym < 0) | |
720 | return (LT_IF); | |
721 | if (value[sym] == NULL) | |
722 | *valp = 0; | |
723 | else { | |
724 | *valp = strtol(value[sym], &ep, 0); | |
725 | if (*ep != '\0' || ep == value[sym]) | |
726 | return (LT_IF); | |
727 | } | |
728 | cp = skipsym(cp); | |
729 | keepthis = false; | |
730 | } else { | |
731 | debug("eval%d bad expr", ops - eval_ops); | |
732 | return (LT_IF); | |
733 | } | |
734 | ||
735 | *cpp = cp; | |
736 | debug("eval%d = %d", ops - eval_ops, *valp); | |
737 | return (*valp ? LT_TRUE : LT_FALSE); | |
738 | } | |
739 | ||
740 | /* | |
741 | * Table-driven evaluation of binary operators. | |
742 | */ | |
743 | static Linetype | |
744 | eval_table(const struct ops *ops, int *valp, const char **cpp) | |
745 | { | |
746 | const struct op *op; | |
747 | const char *cp; | |
748 | int val; | |
eedc9d83 | 749 | Linetype lhs, rhs; |
01f1c879 SR |
750 | |
751 | debug("eval%d", ops - eval_ops); | |
752 | cp = *cpp; | |
eedc9d83 | 753 | lhs = ops->inner(ops+1, valp, &cp); |
01f1c879 SR |
754 | for (;;) { |
755 | cp = skipcomment(cp); | |
756 | for (op = ops->op; op->str != NULL; op++) | |
757 | if (strncmp(cp, op->str, strlen(op->str)) == 0) | |
758 | break; | |
759 | if (op->str == NULL) | |
760 | break; | |
761 | cp += strlen(op->str); | |
762 | debug("eval%d %s", ops - eval_ops, op->str); | |
eedc9d83 RK |
763 | rhs = ops->inner(ops+1, &val, &cp); |
764 | if (op->fn == op_and && (lhs == LT_FALSE || rhs == LT_FALSE)) { | |
765 | debug("eval%d: and always false", ops - eval_ops); | |
766 | if (lhs == LT_IF) | |
767 | *valp = val; | |
768 | lhs = LT_FALSE; | |
769 | continue; | |
770 | } | |
771 | if (op->fn == op_or && (lhs == LT_TRUE || rhs == LT_TRUE)) { | |
772 | debug("eval%d: or always true", ops - eval_ops); | |
773 | if (lhs == LT_IF) | |
774 | *valp = val; | |
775 | lhs = LT_TRUE; | |
776 | continue; | |
777 | } | |
778 | if (rhs == LT_IF) | |
779 | lhs = LT_IF; | |
780 | if (lhs != LT_IF) | |
781 | *valp = op->fn(*valp, val); | |
01f1c879 SR |
782 | } |
783 | ||
784 | *cpp = cp; | |
785 | debug("eval%d = %d", ops - eval_ops, *valp); | |
eedc9d83 RK |
786 | if (lhs != LT_IF) |
787 | lhs = (*valp ? LT_TRUE : LT_FALSE); | |
788 | return lhs; | |
01f1c879 SR |
789 | } |
790 | ||
791 | /* | |
792 | * Evaluate the expression on a #if or #elif line. If we can work out | |
793 | * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we | |
794 | * return just a generic LT_IF. | |
795 | */ | |
796 | static Linetype | |
797 | ifeval(const char **cpp) | |
798 | { | |
eedc9d83 | 799 | const char *cp = *cpp; |
01f1c879 SR |
800 | int ret; |
801 | int val; | |
802 | ||
803 | debug("eval %s", *cpp); | |
804 | keepthis = killconsts ? false : true; | |
eedc9d83 RK |
805 | ret = eval_table(eval_ops, &val, &cp); |
806 | if (ret != LT_IF) | |
807 | *cpp = cp; | |
01f1c879 SR |
808 | debug("eval = %d", val); |
809 | return (keepthis ? LT_IF : ret); | |
810 | } | |
811 | ||
812 | /* | |
813 | * Skip over comments, strings, and character literals and stop at the | |
814 | * next character position that is not whitespace. Between calls we keep | |
815 | * the comment state in the global variable incomment, and we also adjust | |
816 | * the global variable linestate when we see a newline. | |
817 | * XXX: doesn't cope with the buffer splitting inside a state transition. | |
818 | */ | |
819 | static const char * | |
820 | skipcomment(const char *cp) | |
821 | { | |
822 | if (text || ignoring[depth]) { | |
823 | for (; isspace((unsigned char)*cp); cp++) | |
824 | if (*cp == '\n') | |
825 | linestate = LS_START; | |
826 | return (cp); | |
827 | } | |
828 | while (*cp != '\0') | |
829 | /* don't reset to LS_START after a line continuation */ | |
830 | if (strncmp(cp, "\\\n", 2) == 0) | |
831 | cp += 2; | |
832 | else switch (incomment) { | |
833 | case NO_COMMENT: | |
834 | if (strncmp(cp, "/\\\n", 3) == 0) { | |
835 | incomment = STARTING_COMMENT; | |
836 | cp += 3; | |
837 | } else if (strncmp(cp, "/*", 2) == 0) { | |
838 | incomment = C_COMMENT; | |
839 | cp += 2; | |
840 | } else if (strncmp(cp, "//", 2) == 0) { | |
841 | incomment = CXX_COMMENT; | |
842 | cp += 2; | |
843 | } else if (strncmp(cp, "\'", 1) == 0) { | |
844 | incomment = CHAR_LITERAL; | |
845 | linestate = LS_DIRTY; | |
846 | cp += 1; | |
847 | } else if (strncmp(cp, "\"", 1) == 0) { | |
848 | incomment = STRING_LITERAL; | |
849 | linestate = LS_DIRTY; | |
850 | cp += 1; | |
851 | } else if (strncmp(cp, "\n", 1) == 0) { | |
852 | linestate = LS_START; | |
853 | cp += 1; | |
854 | } else if (strchr(" \t", *cp) != NULL) { | |
855 | cp += 1; | |
856 | } else | |
857 | return (cp); | |
858 | continue; | |
859 | case CXX_COMMENT: | |
860 | if (strncmp(cp, "\n", 1) == 0) { | |
861 | incomment = NO_COMMENT; | |
862 | linestate = LS_START; | |
863 | } | |
864 | cp += 1; | |
865 | continue; | |
866 | case CHAR_LITERAL: | |
867 | case STRING_LITERAL: | |
868 | if ((incomment == CHAR_LITERAL && cp[0] == '\'') || | |
869 | (incomment == STRING_LITERAL && cp[0] == '\"')) { | |
870 | incomment = NO_COMMENT; | |
871 | cp += 1; | |
872 | } else if (cp[0] == '\\') { | |
873 | if (cp[1] == '\0') | |
874 | cp += 1; | |
875 | else | |
876 | cp += 2; | |
877 | } else if (strncmp(cp, "\n", 1) == 0) { | |
878 | if (incomment == CHAR_LITERAL) | |
879 | error("unterminated char literal"); | |
880 | else | |
881 | error("unterminated string literal"); | |
882 | } else | |
883 | cp += 1; | |
884 | continue; | |
885 | case C_COMMENT: | |
886 | if (strncmp(cp, "*\\\n", 3) == 0) { | |
887 | incomment = FINISHING_COMMENT; | |
888 | cp += 3; | |
889 | } else if (strncmp(cp, "*/", 2) == 0) { | |
890 | incomment = NO_COMMENT; | |
891 | cp += 2; | |
892 | } else | |
893 | cp += 1; | |
894 | continue; | |
895 | case STARTING_COMMENT: | |
896 | if (*cp == '*') { | |
897 | incomment = C_COMMENT; | |
898 | cp += 1; | |
899 | } else if (*cp == '/') { | |
900 | incomment = CXX_COMMENT; | |
901 | cp += 1; | |
902 | } else { | |
903 | incomment = NO_COMMENT; | |
904 | linestate = LS_DIRTY; | |
905 | } | |
906 | continue; | |
907 | case FINISHING_COMMENT: | |
908 | if (*cp == '/') { | |
909 | incomment = NO_COMMENT; | |
910 | cp += 1; | |
911 | } else | |
912 | incomment = C_COMMENT; | |
913 | continue; | |
914 | default: | |
915 | abort(); /* bug */ | |
916 | } | |
917 | return (cp); | |
918 | } | |
919 | ||
920 | /* | |
921 | * Skip over an identifier. | |
922 | */ | |
923 | static const char * | |
924 | skipsym(const char *cp) | |
925 | { | |
926 | while (!endsym(*cp)) | |
927 | ++cp; | |
928 | return (cp); | |
929 | } | |
930 | ||
931 | /* | |
932 | * Look for the symbol in the symbol table. If is is found, we return | |
933 | * the symbol table index, else we return -1. | |
934 | */ | |
935 | static int | |
936 | findsym(const char *str) | |
937 | { | |
938 | const char *cp; | |
939 | int symind; | |
940 | ||
941 | cp = skipsym(str); | |
942 | if (cp == str) | |
943 | return (-1); | |
944 | if (symlist) { | |
945 | printf("%.*s\n", (int)(cp-str), str); | |
946 | /* we don't care about the value of the symbol */ | |
947 | return (0); | |
948 | } | |
949 | for (symind = 0; symind < nsyms; ++symind) { | |
950 | if (strlcmp(symname[symind], str, cp-str) == 0) { | |
951 | debug("findsym %s %s", symname[symind], | |
952 | value[symind] ? value[symind] : ""); | |
953 | return (symind); | |
954 | } | |
955 | } | |
956 | return (-1); | |
957 | } | |
958 | ||
959 | /* | |
960 | * Add a symbol to the symbol table. | |
961 | */ | |
962 | static void | |
963 | addsym(bool ignorethis, bool definethis, char *sym) | |
964 | { | |
965 | int symind; | |
966 | char *val; | |
967 | ||
968 | symind = findsym(sym); | |
969 | if (symind < 0) { | |
970 | if (nsyms >= MAXSYMS) | |
971 | errx(2, "too many symbols"); | |
972 | symind = nsyms++; | |
973 | } | |
974 | symname[symind] = sym; | |
975 | ignore[symind] = ignorethis; | |
976 | val = sym + (skipsym(sym) - sym); | |
977 | if (definethis) { | |
978 | if (*val == '=') { | |
979 | value[symind] = val+1; | |
980 | *val = '\0'; | |
981 | } else if (*val == '\0') | |
982 | value[symind] = ""; | |
983 | else | |
984 | usage(); | |
985 | } else { | |
986 | if (*val != '\0') | |
987 | usage(); | |
988 | value[symind] = NULL; | |
989 | } | |
990 | } | |
991 | ||
992 | /* | |
993 | * Compare s with n characters of t. | |
994 | * The same as strncmp() except that it checks that s[n] == '\0'. | |
995 | */ | |
996 | static int | |
997 | strlcmp(const char *s, const char *t, size_t n) | |
998 | { | |
999 | while (n-- && *t != '\0') | |
1000 | if (*s != *t) | |
1001 | return ((unsigned char)*s - (unsigned char)*t); | |
1002 | else | |
1003 | ++s, ++t; | |
1004 | return ((unsigned char)*s); | |
1005 | } | |
1006 | ||
1007 | /* | |
1008 | * Diagnostics. | |
1009 | */ | |
1010 | static void | |
1011 | debug(const char *msg, ...) | |
1012 | { | |
1013 | va_list ap; | |
1014 | ||
1015 | if (debugging) { | |
1016 | va_start(ap, msg); | |
1017 | vwarnx(msg, ap); | |
1018 | va_end(ap); | |
1019 | } | |
1020 | } | |
1021 | ||
1022 | static void | |
1023 | error(const char *msg) | |
1024 | { | |
1025 | if (depth == 0) | |
1026 | warnx("%s: %d: %s", filename, linenum, msg); | |
1027 | else | |
1028 | warnx("%s: %d: %s (#if line %d depth %d)", | |
1029 | filename, linenum, msg, stifline[depth], depth); | |
1030 | errx(2, "output may be truncated"); | |
1031 | } |