From harvard!uucp Mon Nov 18 20:02:06 1985 Received: from harvard.HARVARD.EDU by seismo.CSS.GOV with SMTP; Mon, 18 Nov 85 19:09:14 EST Received: by harvard.HARVARD.EDU; Mon, 18 Nov 85 19:09:22 EST From: harvard!uucp (Black Hole) Return-Path: Received: by panda.LOCAL on Mon, 18 Nov 85 18:36:13 est Date: Mon, 18 Nov 85 18:36:13 est Message-Id: <8511182336.AA22105@panda.LOCAL> To: talcott!seismo!rick Subject: Cpp (part 3 of 3) Status: R From: decvax!minow Date: Tue, 8 Jan 85 00:05:58 est Subject: cpp3.arc -h- cpp4.c Mon Jan 7 23:59:34 1985 cpp4.c /* * C P P 4 . C * M a c r o D e f i n i t i o n s * * Edit History * 31-Aug-84 MM USENET net.sources release * 04-Oct-84 MM __LINE__ and __FILE__ must call ungetstring() * so they work correctly with token concatenation. * Added string formal recognition. * 25-Oct-84 MM "Short-circuit" evaluate #if's so that we * don't print unnecessary error messages for * #if !defined(FOO) && FOO != 0 && 10 / FOO ... * 31-Oct-84 ado/MM Added token concatenation * 6-Nov-84 MM Split off eval stuff */ #include #include #include "cppdef.h" #include "cpp.h" /* * parm[], parmp, and parlist[] are used to store #define() argument * lists. nargs contains the actual number of parameters stored. */ static char parm[NPARMWORK + 1]; /* define param work buffer */ static char *parmp; /* Free space in parm */ static char *parlist[LASTPARM]; /* -> start of each parameter */ static int nargs; /* Parameters for this macro */ dodefine() /* * Called from control when a #define is scanned. This module * parses formal parameters and the replacement string. When * the formal parameter name is encountered in the replacement * string, it is replaced by a character in the range 128 to * 128+NPARAM (this allows up to 32 parameters within the * Dec Multinational range). If cpp is ported to an EBCDIC * machine, you will have to make other arrangements. * * There is some special case code to distinguish * #define foo bar * from #define foo() bar * * Also, we make sure that * #define foo foo * expands to "foo" but doesn't put cpp into an infinite loop. * * A warning message is printed if you redefine a symbol to a * different text. I.e, * #define foo 123 * #define foo 123 * is ok, but * #define foo 123 * #define foo +123 * is not. * * The following subroutines are called from define(): * checkparm called when a token is scanned. It checks through the * array of formal parameters. If a match is found, the * token is replaced by a control byte which will be used * to locate the parameter when the macro is expanded. * textput puts a string in the macro work area (parm[]), updating * parmp to point to the first free byte in parm[]. * textput() tests for work buffer overflow. * charput puts a single character in the macro work area (parm[]) * in a manner analogous to textput(). */ { register int c; register DEFBUF *dp; /* -> new definition */ int isredefine; /* TRUE if redefined */ char *old; /* Remember redefined */ extern int save(); /* Save char in work[] */ if (type[(c = skipws())] != LET) goto bad_define; isredefine = FALSE; /* Set if redefining */ if ((dp = lookid(c)) == NULL) /* If not known now */ dp = defendel(token, FALSE); /* Save the name */ else { /* It's known: */ isredefine = TRUE; /* Remember this fact */ old = dp->repl; /* Remember replacement */ dp->repl = NULL; /* No replacement now */ } parlist[0] = parmp = parm; /* Setup parm buffer */ if ((c = get()) == '(') { /* With arguments? */ nargs = 0; /* Init formals counter */ do { /* Collect formal parms */ if (nargs >= LASTPARM) cfatal("Too many arguments for macro", NULLST); else if ((c = skipws()) == ')') break; /* Got them all */ else if (type[c] != LET) /* Bad formal syntax */ goto bad_define; scanid(c); /* Get the formal param */ parlist[nargs++] = parmp; /* Save its start */ textput(token); /* Save text in parm[] */ } while ((c = skipws()) == ','); /* Get another argument */ if (c != ')') /* Must end at ) */ goto bad_define; c = ' '; /* Will skip to body */ } else { /* * DEF_NOARGS is needed to distinguish between * "#define foo" and "#define foo()". */ nargs = DEF_NOARGS; /* No () parameters */ } if (type[c] == SPA) /* At whitespace? */ c = skipws(); /* Not any more. */ workp = work; /* Replacement put here */ inmacro = TRUE; /* Keep \ now */ while (c != EOF_CHAR && c != '\n') { /* Compile macro body */ #if OK_CONCAT if (c == '#') { /* Token concatenation? */ while (workp > work && type[workp[-1]] == SPA) --workp; /* Erase leading spaces */ save(TOK_SEP); /* Stuff a delimiter */ c = skipws(); /* Eat whitespace */ if (type[c] == LET) /* Another token here? */ ; /* Stuff it normally */ else if (type[c] == DIG) { /* Digit string after? */ while (type[c] == DIG) { /* Stuff the digits */ save(c); c = get(); } save(TOK_SEP); /* Delimit 2nd token */ } else { ciwarn("Strange character after # (%d.)", c); } continue; } #endif switch (type[c]) { case LET: checkparm(c, dp); /* Might be a formal */ break; case DIG: /* Number in mac. body */ case DOT: /* Maybe a float number */ scannumber(c, save); /* Scan it off */ break; case QUO: /* String in mac. body */ #if STRING_FORMAL stparmscan(c, dp); /* Do string magic */ #else stparmscan(c); #endif break; case BSH: /* Backslash */ save('\\'); if ((c = get()) == '\n') wrongline = TRUE; save(c); break; case SPA: /* Absorb whitespace */ /* * Note: the "end of comment" marker is passed on * to allow comments to separate tokens. */ if (workp[-1] == ' ') /* Absorb multiple */ break; /* spaces */ else if (c == '\t') c = ' '; /* Normalize tabs */ /* Fall through to store character */ default: /* Other character */ save(c); break; } c = get(); } inmacro = FALSE; /* Stop newline hack */ unget(); /* For control check */ if (workp > work && workp[-1] == ' ') /* Drop trailing blank */ workp--; *workp = EOS; /* Terminate work */ dp->repl = savestring(work); /* Save the string */ dp->nargs = nargs; /* Save arg count */ #if DEBUG if (debug) dumpadef("macro definition", dp); #endif if (isredefine) { /* Error if redefined */ if ((old != NULL && dp->repl != NULL && !streq(old, dp->repl)) || (old == NULL && dp->repl != NULL) || (old != NULL && dp->repl == NULL)) { cerror("Redefining defined variable \"%s\"", dp->name); } if (old != NULL) /* We don't need the */ free(old); /* old definition now. */ } return; bad_define: cerror("#define syntax error", NULLST); inmacro = FALSE; /* Stop hack */ } checkparm(c, dp) register int c; DEFBUF *dp; /* * Replace this param if it's defined. Note that the macro name is a * possible replacement token. We stuff DEF_MAGIC in front of the token * which is treated as a LETTER by the token scanner and eaten by * the output routine. This prevents the macro expander from * looping if someone writes "#define foo foo". */ { register int i; register char *cp; scanid(c); /* Get parm to token[] */ for (i = 0; i < nargs; i++) { /* For each argument */ if (streq(parlist[i], token)) { /* If it's known */ save(i + MAC_PARM); /* Save a magic cookie */ return; /* And exit the search */ } } if (streq(dp->name, token)) /* Macro name in body? */ save(DEF_MAGIC); /* Save magic marker */ for (cp = token; *cp != EOS;) /* And save */ save(*cp++); /* The token itself */ } #if STRING_FORMAL stparmscan(delim, dp) int delim; register DEFBUF *dp; /* * Scan the string (starting with the given delimiter). * The token is replaced if it is the only text in this string or * character constant. The algorithm follows checkparm() above. * Note that scanstring() has approved of the string. */ { register int c; /* * Warning -- this code hasn't been tested for a while. * It exists only to preserve compatibility with earlier * implementations of cpp. It is not part of the Draft * ANSI Standard C language. */ save(delim); instring = TRUE; while ((c = get()) != delim && c != '\n' && c != EOF_CHAR) { if (type[c] == LET) /* Maybe formal parm */ checkparm(c, dp); else { save(c); if (c == '\\') save(get()); } } instring = FALSE; if (c != delim) cerror("Unterminated string in macro body", NULLST); save(c); } #else stparmscan(delim) int delim; /* * Normal string parameter scan. */ { register char *wp; register int i; extern int save(); wp = workp; /* Here's where it starts */ if (!scanstring(delim, save)) return; /* Exit on scanstring error */ workp[-1] = EOS; /* Erase trailing quote */ wp++; /* -> first string content byte */ for (i = 0; i < nargs; i++) { if (streq(parlist[i], wp)) { *wp++ = MAC_PARM + PAR_MAC; /* Stuff a magic marker */ *wp++ = (i + MAC_PARM); /* Make a formal marker */ *wp = wp[-3]; /* Add on closing quote */ workp = wp + 1; /* Reset string end */ return; } } workp[-1] = wp[-1]; /* Nope, reset end quote. */ } #endif doundef() /* * Remove the symbol from the defined list. * Called from the #control processor. */ { register int c; if (type[(c = skipws())] != LET) cerror("Illegal #undef argument", NULLST); else { scanid(c); /* Get name to token[] */ if (defendel(token, TRUE) == NULL) { cwarn("Symbol \"%s\" not defined in #undef", token); } } } textput(text) char *text; /* * Put the string in the parm[] buffer. */ { register int size; size = strlen(text) + 1; if ((parmp + size) >= &parm[NPARMWORK]) cfatal("Macro work area overflow", NULLST); else { strcpy(parmp, text); parmp += size; } } charput(c) register int c; /* * Put the byte in the parm[] buffer. */ { if (parmp >= &parm[NPARMWORK]) cfatal("Macro work area overflow", NULLST); else { *parmp++ = c; } } /* * M a c r o E x p a n s i o n */ static DEFBUF *macro; /* Catches start of infinite macro */ expand(tokenp) register DEFBUF *tokenp; /* * Expand a macro. Called from the cpp mainline routine (via subroutine * macroid()) when a token is found in the symbol table. It calls * expcollect() to parse actual parameters, checking for the correct number. * It then creates a "file" containing a single line containing the * macro with actual parameters inserted appropriately. This is * "pushed back" onto the input stream. (When the get() routine runs * off the end of the macro line, it will dismiss the macro itself.) */ { register int c; register FILEINFO *file; extern FILEINFO *getfile(); #if DEBUG if (debug) dumpadef("expand entry", tokenp); #endif /* * If no macro is pending, save the name of this macro * for an eventual error message. */ if (recursion++ == 0) macro = tokenp; else if (recursion == RECURSION_LIMIT) { cerror("Recursive macro definition of \"%s\"", tokenp->name); fprintf(stderr, "(Defined by \"%s\")\n", macro->name); if (rec_recover) { do { c = get(); } while (infile != NULL && infile->fp == NULL); unget(); recursion = 0; return; } } /* * Here's a macro to expand. */ nargs = 0; /* Formals counter */ parmp = parm; /* Setup parm buffer */ switch (tokenp->nargs) { case (-2): /* __LINE__ */ sprintf(work, "%d", line); ungetstring(work); break; case (-3): /* __FILE__ */ for (file = infile; file != NULL; file = file->parent) { if (file->fp != NULL) { sprintf(work, "\"%s\"", (file->progname != NULL) ? file->progname : file->filename); ungetstring(work); break; } } break; default: /* * Nothing funny about this macro. */ if (tokenp->nargs < 0) cfatal("Bug: Illegal __ macro \"%s\"", tokenp->name); while ((c = skipws()) == '\n') /* Look for (, skipping */ wrongline = TRUE; /* spaces and newlines */ if (c != '(') { /* * If the programmer writes * #define foo() ... * ... * foo [no ()] * just write foo to the output stream. */ unget(); cwarn("Macro \"%s\" needs arguments", tokenp->name); fputs(tokenp->name, stdout); return; } else if (expcollect()) { /* Collect arguments */ if (tokenp->nargs != nargs) { /* Should be an error? */ cwarn("Wrong number of macro arguments for \"%s\"", tokenp->name); } #if DEBUG if (debug) dumpparm("expand"); #endif } /* Collect arguments */ case DEF_NOARGS: /* No parameters just stuffs */ expstuff(tokenp); /* Do actual parameters */ } /* nargs switch */ } FILE_LOCAL int expcollect() /* * Collect the actual parameters for this macro. TRUE if ok. */ { register int c; register int paren; /* For embedded ()'s */ extern int charput(); for (;;) { paren = 0; /* Collect next arg. */ while ((c = skipws()) == '\n') /* Skip over whitespace */ wrongline = TRUE; /* and newlines. */ if (c == ')') { /* At end of all args? */ /* * Note that there is a guard byte in parm[] * so we don't have to check for overflow here. */ *parmp = EOS; /* Make sure terminated */ break; /* Exit collection loop */ } else if (nargs >= LASTPARM) cfatal("Too many arguments in macro expansion", NULLST); parlist[nargs++] = parmp; /* At start of new arg */ for (;; c = cget()) { /* Collect arg's bytes */ if (c == EOF_CHAR) { cerror("end of file within macro argument", NULLST); return (FALSE); /* Sorry. */ } else if (c == '\\') { /* Quote next character */ charput(c); /* Save the \ for later */ charput(cget()); /* Save the next char. */ continue; /* And go get another */ } else if (type[c] == QUO) { /* Start of string? */ scanstring(c, charput); /* Scan it off */ continue; /* Go get next char */ } else if (c == '(') /* Worry about balance */ paren++; /* To know about commas */ else if (c == ')') { /* Other side too */ if (paren == 0) { /* At the end? */ unget(); /* Look at it later */ break; /* Exit arg getter. */ } paren--; /* More to come. */ } else if (c == ',' && paren == 0) /* Comma delimits args */ break; else if (c == '\n') /* Newline inside arg? */ wrongline = TRUE; /* We'll need a #line */ charput(c); /* Store this one */ } /* Collect an argument */ charput(EOS); /* Terminate argument */ #if DEBUG if (debug) printf("parm[%d] = \"%s\"\n", nargs, parlist[nargs - 1]); #endif } /* Collect all args. */ return (TRUE); /* Normal return */ } FILE_LOCAL expstuff(tokenp) DEFBUF *tokenp; /* Current macro being expanded */ /* * Stuff the macro body, replacing formal parameters by actual parameters. */ { register int c; /* Current character */ register char *inp; /* -> repl string */ register char *defp; /* -> macro output buff */ int size; /* Actual parm. size */ char *defend; /* -> output buff end */ int string_magic; /* String formal hack */ FILEINFO *file; /* Funny #include */ extern FILEINFO *getfile(); file = getfile(NBUFF, tokenp->name); inp = tokenp->repl; /* -> macro replacement */ defp = file->buffer; /* -> output buffer */ defend = defp + (NBUFF - 1); /* Note its end */ if (inp != NULL) { while ((c = (*inp++ & 0xFF)) != EOS) { if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) { string_magic = (c == (MAC_PARM + PAR_MAC)); if (string_magic) c = (*inp++ & 0xFF); /* * Replace formal parameter by actual parameter string. */ if ((c -= MAC_PARM) < nargs) { size = strlen(parlist[c]); if ((defp + size) >= defend) goto nospace; /* * Erase the extra set of quotes. */ if (string_magic && defp[-1] == parlist[c][0]) { strcpy(defp-1, parlist[c]); defp += (size - 2); } else { strcpy(defp, parlist[c]); defp += size; } } } else if (defp >= defend) { nospace: cfatal("Out of space in macro \"%s\" arg expansion", tokenp->name); } else { *defp++ = c; } } } *defp = EOS; #if DEBUG if (debug > 1) printf("macroline: \"%s\"\n", file->buffer); #endif } #if DEBUG dumpparm(why) char *why; /* * Dump parameter list. */ { register int i; printf("dump of %d parameters (%d bytes total) %s\n", nargs, parmp - parm, why); for (i = 0; i < nargs; i++) { printf("parm[%d] (%d) = \"%s\"\n", i + 1, strlen(parlist[i]), parlist[i]); } } #endif -h- cpp5.c Mon Jan 7 23:59:34 1985 cpp5.c /* * C P P 5 . C * E x p r e s s i o n E v a l u a t i o n * * Edit History * 31-Aug-84 MM USENET net.sources release * 04-Oct-84 MM __LINE__ and __FILE__ must call ungetstring() * so they work correctly with token concatenation. * Added string formal recognition. * 25-Oct-84 MM "Short-circuit" evaluate #if's so that we * don't print unnecessary error messages for * #if !defined(FOO) && FOO != 0 && 10 / FOO ... * 31-Oct-84 ado/MM Added token concatenation * 6-Nov-84 MM Split from #define stuff, added sizeof stuff * 19-Nov-84 ado #if error returns TRUE for (sigh) compatibility */ #include #include #include "cppdef.h" #include "cpp.h" /* * Evaluate an #if expression. */ static char *opname[] = { /* For debug and error messages */ "end of expression", "val", "id", "+", "-", "*", "/", "%", "<<", ">>", "&", "|", "^", "==", "!=", "<", "<=", ">=", ">", "&&", "||", "?", ":", ",", "unary +", "unary -", "~", "!", "(", ")", "(none)", }; /* * opdope[] has the operator precedence: * Bits * 7 Unused (so the value is always positive) * 6-2 Precedence (000x .. 017x) * 1-0 Binary op. flags: * 01 The binop flag should be set/cleared when this op is seen. * 10 The new value of the binop flag. * Note: Expected, New binop * constant 0 1 Binop, end, or ) should follow constants * End of line 1 0 End may not be preceeded by an operator * binary 1 0 Binary op follows a value, value follows. * unary 0 0 Unary op doesn't follow a value, value follows * ( 0 0 Doesn't follow value, value or unop follows * ) 1 1 Follows value. Op follows. */ static char opdope[OP_MAX] = { 0001, /* End of expression */ 0002, /* Digit */ 0000, /* Letter (identifier) */ 0141, 0141, 0151, 0151, 0151, /* ADD, SUB, MUL, DIV, MOD */ 0131, 0131, 0101, 0071, 0071, /* ASL, ASR, AND, OR, XOR */ 0111, 0111, 0121, 0121, 0121, 0121, /* EQ, NE, LT, LE, GE, GT */ 0061, 0051, 0041, 0041, 0031, /* ANA, ORO, QUE, COL, CMA */ /* * Unary op's follow */ 0160, 0160, 0160, 0160, /* NEG, PLU, COM, NOT */ 0170, 0013, 0023, /* LPA, RPA, END */ }; /* * OP_QUE and OP_RPA have alternate precedences: */ #define OP_RPA_PREC 0013 #define OP_QUE_PREC 0034 /* * S_ANDOR and S_QUEST signal "short-circuit" boolean evaluation, so that * #if FOO != 0 && 10 / FOO ... * doesn't generate an error message. They are stored in optab.skip. */ #define S_ANDOR 2 #define S_QUEST 1 typedef struct optab { char op; /* Operator */ char prec; /* Its precedence */ char skip; /* Short-circuit: TRUE to skip */ } OPTAB; static int evalue; /* Current value from evallex() */ #ifdef nomacargs FILE_LOCAL int isbinary(op) register int op; { return (op >= FIRST_BINOP && op <= LAST_BINOP); } FILE_LOCAL int isunary(op) register int op; { return (op >= FIRST_UNOP && op <= LAST_UNOP); } #else #define isbinary(op) (op >= FIRST_BINOP && op <= LAST_BINOP) #define isunary(op) (op >= FIRST_UNOP && op <= LAST_UNOP) #endif /* * The following definitions are used to specify basic variable sizes. */ #ifndef S_CHAR #define S_CHAR (sizeof (char)) #endif #ifndef S_SINT #define S_SINT (sizeof (short int)) #endif #ifndef S_INT #define S_INT (sizeof (int)) #endif #ifndef S_LINT #define S_LINT (sizeof (long int)) #endif #ifndef S_FLOAT #define S_FLOAT (sizeof (float)) #endif #ifndef S_DOUBLE #define S_DOUBLE (sizeof (double)) #endif #ifndef S_PCHAR #define S_PCHAR (sizeof (char *)) #endif #ifndef S_PSINT #define S_PSINT (sizeof (short int *)) #endif #ifndef S_PINT #define S_PINT (sizeof (int *)) #endif #ifndef S_PLINT #define S_PLINT (sizeof (long int *)) #endif #ifndef S_PFLOAT #define S_PFLOAT (sizeof (float *)) #endif #ifndef S_PDOUBLE #define S_PDOUBLE (sizeof (double *)) #endif #ifndef S_PFPTR #define S_PFPTR (sizeof (int (*)())) #endif typedef struct types { short type; /* This is the bit if */ char *name; /* this is the token word */ } TYPES; static TYPES basic_types[] = { { T_CHAR, "char", }, { T_INT, "int", }, { T_FLOAT, "float", }, { T_DOUBLE, "double", }, { T_SHORT, "short", }, { T_LONG, "long", }, { T_SIGNED, "signed", }, { T_UNSIGNED, "unsigned", }, { 0, NULL, }, /* Signal end */ }; /* * Test_table[] is used to test for illegal combinations. */ static short test_table[] = { T_FLOAT | T_DOUBLE | T_LONG | T_SHORT, T_FLOAT | T_DOUBLE | T_CHAR | T_INT, T_FLOAT | T_DOUBLE | T_SIGNED | T_UNSIGNED, T_LONG | T_SHORT | T_CHAR, 0 /* end marker */ }; /* * The order of this table is important -- it is also referenced by * the command line processor to allow run-time overriding of the * built-in size values. The order must not be changed: * char, short, int, long, float, double (func pointer) */ SIZES size_table[] = { { T_CHAR, S_CHAR, S_PCHAR }, /* char */ { T_SHORT, S_SINT, S_PSINT }, /* short int */ { T_INT, S_INT, S_PINT }, /* int */ { T_LONG, S_LINT, S_PLINT }, /* long */ { T_FLOAT, S_FLOAT, S_PFLOAT }, /* float */ { T_DOUBLE, S_DOUBLE, S_PDOUBLE }, /* double */ { T_FPTR, 0, S_PFPTR }, /* int (*()) */ { 0, 0, 0 }, /* End of table */ }; int eval() /* * Evaluate an expression. Straight-forward operator precedence. * This is called from control() on encountering an #if statement. * It calls the following routines: * evallex Lexical analyser -- returns the type and value of * the next input token. * evaleval Evaluate the current operator, given the values on * the value stack. Returns a pointer to the (new) * value stack. * For compatiblity with older cpp's, this return returns 1 (TRUE) * if a syntax error is detected. */ { register int op; /* Current operator */ register int *valp; /* -> value vector */ register OPTAB *opp; /* Operator stack */ int prec; /* Op precedence */ int binop; /* Set if binary op. needed */ int op1; /* Operand from stack */ int skip; /* For short-circuit testing */ int value[NEXP]; /* Value stack */ OPTAB opstack[NEXP]; /* Operand stack */ extern int *evaleval(); /* Does actual evaluation */ valp = value; opp = opstack; opp->op = OP_END; /* Mark bottom of stack */ opp->prec = opdope[OP_END]; /* And its precedence */ opp->skip = 0; /* Not skipping now */ binop = 0; again: ; #ifdef DEBUG_EVAL printf("In #if at again: skip = %d, binop = %d, line is: %s", opp->skip, binop, infile->bptr); #endif if ((op = evallex(opp->skip)) == OP_SUB && binop == 0) op = OP_NEG; /* Unary minus */ else if (op == OP_ADD && binop == 0) op = OP_PLU; /* Unary plus */ else if (op == OP_FAIL) return (1); /* Error in evallex */ #ifdef DEBUG_EVAL printf("op = %s, opdope = %03o, binop = %d, skip = %d\n", opname[op], opdope[op], binop, opp->skip); #endif if (op == DIG) { /* Value? */ if (binop != 0) { cerror("misplaced constant in #if", NULLST); return (1); } else if (valp >= &value[NEXP-1]) { cerror("#if value stack overflow", NULLST); return (1); } else { #ifdef DEBUG_EVAL printf("pushing %d onto value stack[%d]\n", evalue, valp - value); #endif *valp++ = evalue; binop = 1; } goto again; } else if (op > OP_END) { cerror("Illegal #if line", NULLST); return (1); } prec = opdope[op]; if (binop != (prec & 1)) { cerror("Operator %s in incorrect context", opname[op]); return (1); } binop = (prec & 2) >> 1; for (;;) { #ifdef DEBUG_EVAL printf("op %s, prec %d., stacked op %s, prec %d, skip %d\n", opname[op], prec, opname[opp->op], opp->prec, opp->skip); #endif if (prec > opp->prec) { if (op == OP_LPA) prec = OP_RPA_PREC; else if (op == OP_QUE) prec = OP_QUE_PREC; op1 = opp->skip; /* Save skip for test */ /* * Push operator onto op. stack. */ opp++; if (opp >= &opstack[NEXP]) { cerror("expression stack overflow at op \"%s\"", opname[op]); return (1); } opp->op = op; opp->prec = prec; skip = (valp[-1] != 0); /* Short-circuit tester */ /* * Do the short-circuit stuff here. Short-circuiting * stops automagically when operators are evaluated. */ if ((op == OP_ANA && !skip) || (op == OP_ORO && skip)) opp->skip = S_ANDOR; /* And/or skip starts */ else if (op == OP_QUE) /* Start of ?: operator */ opp->skip = (op1 & S_ANDOR) | ((!skip) ? S_QUEST : 0); else if (op == OP_COL) { /* : inverts S_QUEST */ opp->skip = (op1 & S_ANDOR) | (((op1 & S_QUEST) != 0) ? 0 : S_QUEST); } else { /* Other ops leave */ opp->skip = op1; /* skipping unchanged. */ } #ifdef DEBUG_EVAL printf("stacking %s, valp[-1] == %d at %s", opname[op], valp[-1], infile->bptr); dumpstack(opstack, opp, value, valp); #endif goto again; } /* * Pop operator from op. stack and evaluate it. * End of stack and '(' are specials. */ skip = opp->skip; /* Remember skip value */ switch ((op1 = opp->op)) { /* Look at stacked op */ case OP_END: /* Stack end marker */ if (op == OP_EOE) return (valp[-1]); /* Finished ok. */ goto again; /* Read another op. */ case OP_LPA: /* ( on stack */ if (op != OP_RPA) { /* Matches ) on input */ cerror("unbalanced paren's, op is \"%s\"", opname[op]); return (1); } opp--; /* Unstack it */ /* goto again; -- Fall through */ case OP_QUE: goto again; /* Evaluate true expr. */ case OP_COL: /* : on stack. */ opp--; /* Unstack : */ if (opp->op != OP_QUE) { /* Matches ? on stack? */ cerror("Misplaced '?' or ':', previous operator is %s", opname[opp->op]); return (1); } /* * Evaluate op1. */ default: /* Others: */ opp--; /* Unstack the operator */ #ifdef DEBUG_EVAL printf("Stack before evaluation of %s\n", opname[op1]); dumpstack(opstack, opp, value, valp); #endif valp = evaleval(valp, op1, skip); #ifdef DEBUG_EVAL printf("Stack after evaluation\n"); dumpstack(opstack, opp, value, valp); #endif } /* op1 switch end */ } /* Stack unwind loop */ } FILE_LOCAL int evallex(skip) int skip; /* TRUE if short-circuit evaluation */ /* * Return next eval operator or value. Called from eval(). It * calls a special-purpose routines for 'char' strings and * numeric values: * evalchar called to evaluate 'x' * evalnum called to evaluate numbers. */ { register int c, c1, t; again: do { /* Collect the token */ c = skipws(); if ((c = macroid(c)) == EOF_CHAR || c == '\n') { unget(); return (OP_EOE); /* End of expression */ } } while ((t = type[c]) == LET && catenate()); if (t == INV) { /* Total nonsense */ if (!skip) { if (isascii(c) && isprint(c)) cierror("illegal character '%c' in #if", c); else cierror("illegal character (%d decimal) in #if", c); } return (OP_FAIL); } else if (t == QUO) { /* ' or " */ if (c == '\'') { /* Character constant */ evalue = evalchar(skip); /* Somewhat messy */ #ifdef DEBUG_EVAL printf("evalchar returns %d.\n", evalue); #endif return (DIG); /* Return a value */ } cerror("Can't use a string in an #if", NULLST); return (OP_FAIL); } else if (t == LET) { /* ID must be a macro */ if (streq(token, "defined")) { /* Or defined name */ c1 = c = skipws(); if (c == '(') /* Allow defined(name) */ c = skipws(); if (type[c] == LET) { evalue = (lookid(c) != NULL); if (c1 != '(' /* Need to balance */ || skipws() == ')') /* Did we balance? */ return (DIG); /* Parsed ok */ } cerror("Bad #if ... defined() syntax", NULLST); return (OP_FAIL); } else if (streq(token, "sizeof")) /* New sizeof hackery */ return (dosizeof()); /* Gets own routine */ /* * The Draft ANSI C Standard says that an undefined symbol * in an #if has the value zero. We are a bit pickier, * warning except where the programmer was careful to write * #if defined(foo) ? foo : 0 */ if (!skip) cwarn("undefined symbol \"%s\" in #if, 0 used", token); evalue = 0; return (DIG); } else if (t == DIG) { /* Numbers are harder */ evalue = evalnum(c); #ifdef DEBUG_EVAL printf("evalnum returns %d.\n", evalue); #endif } else if (strchr("!=<>&|\\", c) != NULL) { /* * Process a possible multi-byte lexeme. */ c1 = cget(); /* Peek at next char */ switch (c) { case '!': if (c1 == '=') return (OP_NE); break; case '=': if (c1 != '=') { /* Can't say a=b in #if */ unget(); cerror("= not allowed in #if", NULLST); return (OP_FAIL); } return (OP_EQ); case '>': case '<': if (c1 == c) return ((c == '<') ? OP_ASL : OP_ASR); else if (c1 == '=') return ((c == '<') ? OP_LE : OP_GE); break; case '|': case '&': if (c1 == c) return ((c == '|') ? OP_ORO : OP_ANA); break; case '\\': if (c1 == '\n') /* Multi-line if */ goto again; cerror("Unexpected \\ in #if", NULLST); return (OP_FAIL); } unget(); } return (t); } FILE_LOCAL int dosizeof() /* * Process the sizeof (basic type) operation in an #if string. * Sets evalue to the size and returns * DIG success * OP_FAIL bad parse or something. */ { register int c; register TYPES *tp; register SIZES *sizp; register short *testp; short typecode; if ((c = skipws()) != '(') goto nogood; /* * Scan off the tokens. */ typecode = 0; while ((c = skipws())) { if ((c = macroid(c)) == EOF_CHAR || c == '\n') goto nogood; /* End of line is a bug */ else if (c == '(') { /* thing (*)() func ptr */ if (skipws() == '*' && skipws() == ')') { /* We found (*) */ if (skipws() != '(') /* Let () be optional */ unget(); else if (skipws() != ')') goto nogood; typecode |= T_FPTR; /* Function pointer */ } else { /* Junk is a bug */ goto nogood; } } else if (type[c] != LET) /* Exit if not a type */ break; else if (!catenate()) { /* Maybe combine tokens */ /* * Look for this unexpandable token in basic_types. * The code accepts "int long" as well as "long int" * which is a minor bug as bugs go (and one shared with * a lot of C compilers). */ for (tp = basic_types; tp->name != NULLST; tp++) { if (streq(token, tp->name)) break; } if (tp->name == NULLST) { cerror("#if sizeof, unknown type \"%s\"", token); return (OP_FAIL); } typecode |= tp->type; /* Or in the type bit */ } } /* * We are at the end of the type scan. Chew off '*' if necessary. */ if (c == '*') { typecode |= T_PTR; c = skipws(); } if (c == ')') { /* Last syntax check */ for (testp = test_table; *testp != 0; testp++) { if (!bittest(typecode & *testp)) { cerror("#if ... sizeof: illegal type combination", NULLST); return (OP_FAIL); } } /* * We assume that all function pointers are the same size: * sizeof (int (*)()) == sizeof (float (*)()) * We assume that signed and unsigned don't change the size: * sizeof (signed int) == (sizeof unsigned int) */ if ((typecode & T_FPTR) != 0) /* Function pointer */ typecode = T_FPTR | T_PTR; else { /* Var or var * datum */ typecode &= ~(T_SIGNED | T_UNSIGNED); if ((typecode & (T_SHORT | T_LONG)) != 0) typecode &= ~T_INT; } if ((typecode & ~T_PTR) == 0) { cerror("#if sizeof() error, no type specified", NULLST); return (OP_FAIL); } /* * Exactly one bit (and possibly T_PTR) may be set. */ for (sizp = size_table; sizp->bits != 0; sizp++) { if ((typecode & ~T_PTR) == sizp->bits) { evalue = ((typecode & T_PTR) != 0) ? sizp->psize : sizp->size; return (DIG); } } /* We shouldn't fail */ cierror("#if ... sizeof: bug, unknown type code 0x%x", typecode); return (OP_FAIL); } nogood: unget(); cerror("#if ... sizeof() syntax error", NULLST); return (OP_FAIL); } FILE_LOCAL int bittest(value) /* * TRUE if value is zero or exactly one bit is set in value. */ { #if (4096 & ~(-4096)) == 0 return ((value & ~(-value)) == 0); #else /* * Do it the hard way (for non 2's complement machines) */ return (value == 0 || value ^ (value - 1) == (value * 2 - 1)); #endif } FILE_LOCAL int evalnum(c) register int c; /* * Expand number for #if lexical analysis. Note: evalnum recognizes * the unsigned suffix, but only returns a signed int value. */ { register int value; register int base; register int c1; if (c != '0') base = 10; else if ((c = cget()) == 'x' || c == 'X') { base = 16; c = cget(); } else base = 8; value = 0; for (;;) { c1 = c; if (isascii(c) && isupper(c1)) c1 = tolower(c1); if (c1 >= 'a') c1 -= ('a' - 10); else c1 -= '0'; if (c1 < 0 || c1 >= base) break; value *= base; value += c1; c = cget(); } if (c == 'u' || c == 'U') /* Unsigned nonsense */ c = cget(); unget(); return (value); } FILE_LOCAL int evalchar(skip) int skip; /* TRUE if short-circuit evaluation */ /* * Get a character constant */ { register int c; register int value; register int count; instring = TRUE; if ((c = cget()) == '\\') { switch ((c = cget())) { case 'a': /* New in Standard */ #if ('a' == '\a' || '\a' == ALERT) value = ALERT; /* Use predefined value */ #else value = '\a'; /* Use compiler's value */ #endif break; case 'b': value = '\b'; break; case 'f': value = '\f'; break; case 'n': value = '\n'; break; case 'r': value = '\r'; break; case 't': value = '\t'; break; case 'v': /* New in Standard */ #if ('v' == '\v' || '\v' == VT) value = VT; /* Use predefined value */ #else value = '\v'; /* Use compiler's value */ #endif break; case 'x': /* '\xFF' */ count = 3; value = 0; while ((((c = get()) >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) && (--count >= 0)) { value *= 16; value += (c <= '9') ? (c - '0') : ((c & 0xF) + 9); } unget(); break; default: if (c >= '0' && c <= '7') { count = 3; value = 0; while (c >= '0' && c <= '7' && --count >= 0) { value *= 8; value += (c - '0'); c = get(); } unget(); } else value = c; break; } } else if (c == '\'') value = 0; else value = c; /* * We warn on multi-byte constants and try to hack * (big|little)endian machines. */ #if BIG_ENDIAN count = 0; #endif while ((c = get()) != '\'' && c != EOF_CHAR && c != '\n') { if (!skip) ciwarn("multi-byte constant '%c' isn't portable", c); #if BIG_ENDIAN count += BITS_CHAR; value += (c << count); #else value <<= BITS_CHAR; value += c; #endif } instring = FALSE; return (value); } FILE_LOCAL int * evaleval(valp, op, skip) register int *valp; int op; int skip; /* TRUE if short-circuit evaluation */ /* * Apply the argument operator to the data on the value stack. * One or two values are popped from the value stack and the result * is pushed onto the value stack. * * OP_COL is a special case. * * evaleval() returns the new pointer to the top of the value stack. */ { register int v1, v2; if (isbinary(op)) v2 = *--valp; v1 = *--valp; #ifdef DEBUG_EVAL printf("%s op %s", (isbinary(op)) ? "binary" : "unary", opname[op]); if (isbinary(op)) printf(", v2 = %d.", v2); printf(", v1 = %d.\n", v1); #endif switch (op) { case OP_EOE: break; case OP_ADD: v1 += v2; break; case OP_SUB: v1 -= v2; break; case OP_MUL: v1 *= v2; break; case OP_DIV: case OP_MOD: if (v2 == 0) { if (!skip) { cwarn("%s by zero in #if, zero result assumed", (op == OP_DIV) ? "divide" : "mod"); } v1 = 0; } else if (op == OP_DIV) v1 /= v2; else v1 %= v2; break; case OP_ASL: v1 <<= v2; break; case OP_ASR: v1 >>= v2; break; case OP_AND: v1 &= v2; break; case OP_OR: v1 |= v2; break; case OP_XOR: v1 ^= v2; break; case OP_EQ: v1 = (v1 == v2); break; case OP_NE: v1 = (v1 != v2); break; case OP_LT: v1 = (v1 < v2); break; case OP_LE: v1 = (v1 <= v2); break; case OP_GE: v1 = (v1 >= v2); break; case OP_GT: v1 = (v1 > v2); break; case OP_ANA: v1 = (v1 && v2); break; case OP_ORO: v1 = (v1 || v2); break; case OP_COL: /* * v1 has the "true" value, v2 the "false" value. * The top of the value stack has the test. */ v1 = (*--valp) ? v1 : v2; break; case OP_NEG: v1 = (-v1); break; case OP_PLU: break; case OP_COM: v1 = ~v1; break; case OP_NOT: v1 = !v1; break; default: cierror("#if bug, operand = %d.", op); v1 = 0; } *valp++ = v1; return (valp); } #ifdef DEBUG_EVAL dumpstack(opstack, opp, value, valp) OPTAB opstack[NEXP]; /* Operand stack */ register OPTAB *opp; /* Operator stack */ int value[NEXP]; /* Value stack */ register int *valp; /* -> value vector */ { printf("index op prec skip name -- op stack at %s", infile->bptr); while (opp > opstack) { printf(" [%2d] %2d %03o %d %s\n", opp - opstack, opp->op, opp->prec, opp->skip, opname[opp->op]); opp--; } while (--valp >= value) { printf("value[%d] = %d\n", (valp - value), *valp); } } #endif -h- cpp6.c Mon Jan 7 23:59:34 1985 cpp6.c /* * C P P 6 . C * S u p p o r t R o u t i n e s * * Edit History * 25-May-84 MM Added 8-bit support to type table. * 30-May-84 ARF sharp() should output filename in quotes * 02-Aug-84 MM Newline and #line hacking. sharp() now in cpp1.c * 31-Aug-84 MM USENET net.sources release * 11-Sep-84 ado/MM Keepcomments, also line number pathological * 12-Sep-84 ado/MM bug if comment changes to space and we unget later. * 03-Oct-84 gkr/MM Fixed scannumber bug for '.e' (as in struct.element). * 04-Oct-84 MM Added ungetstring() for token concatenation * 08-Oct-84 MM Yet another attack on number scanning * 31-Oct-84 ado Parameterized $ in identifiers * 2-Nov-84 MM Token concatenation is messier than I thought * 6-Dec-84 MM \ is everywhere invisible. */ #include #include #include "cppdef.h" #include "cpp.h" /* * skipnl() skips over input text to the end of the line. * skipws() skips over "whitespace" (spaces or tabs), but * not skip over the end of the line. It skips over * TOK_SEP, however (though that shouldn't happen). * scanid() reads the next token (C identifier) into token[]. * The caller has already read the first character of * the identifier. Unlike macroid(), the token is * never expanded. * macroid() reads the next token (C identifier) into token[]. * If it is a #defined macro, it is expanded, and * macroid() returns TRUE, otherwise, FALSE. * catenate() Does the dirty work of token concatenation, TRUE if it did. * scanstring() Reads a string from the input stream, calling * a user-supplied function for each character. * This function may be output() to write the * string to the output file, or save() to save * the string in the work buffer. * scannumber() Reads a C numeric constant from the input stream, * calling the user-supplied function for each * character. (output() or save() as noted above.) * save() Save one character in the work[] buffer. * savestring() Saves a string in malloc() memory. * getfile() Initialize a new FILEINFO structure, called when * #include opens a new file, or a macro is to be * expanded. * getmem() Get a specified number of bytes from malloc memory. * output() Write one character to stdout (calling putchar) -- * implemented as a function so its address may be * passed to scanstring() and scannumber(). * lookid() Scans the next token (identifier) from the input * stream. Looks for it in the #defined symbol table. * Returns a pointer to the definition, if found, or NULL * if not present. The identifier is stored in token[]. * defnedel() Define enter/delete subroutine. Updates the * symbol table. * get() Read the next byte from the current input stream, * handling end of (macro/file) input and embedded * comments appropriately. Note that the global * instring is -- essentially -- a parameter to get(). * cget() Like get(), but skip over TOK_SEP. * unget() Push last gotten character back on the input stream. * cerror(), cwarn(), cfatal(), cierror(), ciwarn() * These routines format an print messages to the user. * cerror & cwarn take a format and a single string argument. * cierror & ciwarn take a format and a single int (char) argument. * cfatal takes a format and a single string argument. */ /* * This table must be rewritten for a non-Ascii machine. * * Note that several "non-visible" characters have special meaning: * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion. * Hex 1E TOK_SEP -- a delimiter for token concatenation * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation */ #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D << error type table isn't correct >> #endif #if OK_DOLLAR #define DOL LET #else #define DOL 000 #endif char type[256] = { /* Character type codes Hex */ END, 000, 000, 000, 000, 000, 000, 000, /* 00 */ 000, SPA, 000, 000, 000, 000, 000, 000, /* 08 */ 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */ OP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */ DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */ DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */ 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */ LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */ LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */ LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */ 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */ LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */ LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */ LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ }; skipnl() /* * Skip to the end of the current input line. */ { register int c; do { /* Skip to newline */ c = get(); } while (c != '\n' && c != EOF_CHAR); } int skipws() /* * Skip over whitespace */ { register int c; do { /* Skip whitespace */ c = get(); #if COMMENT_INVISIBLE } while (type[c] == SPA || c == COM_SEP); #else } while (type[c] == SPA); #endif return (c); } scanid(c) register int c; /* First char of id */ /* * Get the next token (an id) into the token buffer. * Note: this code is duplicated in lookid(). * Change one, change both. */ { register char *bp; if (c == DEF_MAGIC) /* Eat the magic token */ c = get(); /* undefiner. */ bp = token; do { if (bp < &token[IDMAX]) /* token dim is IDMAX+1 */ *bp++ = c; c = get(); } while (type[c] == LET || type[c] == DIG); unget(); *bp = EOS; } int macroid(c) register int c; /* * If c is a letter, scan the id. if it's #defined, expand it and scan * the next character and try again. * * Else, return the character. If type[c] is a LET, the token is in token. */ { register DEFBUF *dp; if (infile != NULL && infile->fp != NULL) recursion = 0; while (type[c] == LET && (dp = lookid(c)) != NULL) { expand(dp); c = get(); } return (c); } int catenate() /* * A token was just read (via macroid). * If the next character is TOK_SEP, concatenate the next token * return TRUE -- which should recall macroid after refreshing * macroid's argument. If it is not TOK_SEP, unget() the character * and return FALSE. */ { register int c; register char *token1; #if OK_CONCAT if (get() != TOK_SEP) { /* Token concatenation */ unget(); return (FALSE); } else { token1 = savestring(token); /* Save first token */ c = macroid(get()); /* Scan next token */ switch(type[c]) { /* What was it? */ case LET: /* An identifier, ... */ if (strlen(token1) + strlen(token) >= NWORK) cfatal("work buffer overflow doing %s #", token1); sprintf(work, "%s%s", token1, token); break; case DIG: /* A digit string */ strcpy(work, token1); workp = work + strlen(work); do { save(c); } while ((c = get()) != TOK_SEP); /* * The trailing TOK_SEP is no longer needed. */ save(EOS); break; default: /* An error, ... */ if (isprint(c)) cierror("Strange character '%c' after #", c); else cierror("Strange character (%d.) after #", c); strcpy(work, token1); unget(); break; } /* * work has the concatenated token and token1 has * the first token (no longer needed). Unget the * new (concatenated) token after freeing token1. * Finally, setup to read the new token. */ free(token1); /* Free up memory */ ungetstring(work); /* Unget the new thing, */ return (TRUE); } #else return (FALSE); /* Not supported */ #endif } int scanstring(delim, outfun) register int delim; /* ' or " */ int (*outfun)(); /* Output function */ /* * Scan off a string. Warning if terminated by newline or EOF. * outfun() outputs the character -- to a buffer if in a macro. * TRUE if ok, FALSE if error. */ { register int c; instring = TRUE; /* Don't strip comments */ (*outfun)(delim); while ((c = get()) != delim && c != '\n' && c != EOF_CHAR) { (*outfun)(c); if (c == '\\') (*outfun)(get()); } instring = FALSE; if (c == delim) { (*outfun)(c); return (TRUE); } else { cerror("Unterminated string", NULLST); unget(); return (FALSE); } } scannumber(c, outfun) register int c; /* First char of number */ register int (*outfun)(); /* Output/store func */ /* * Process a number. We know that c is from 0 to 9 or dot. * Algorithm from Dave Conroy's Decus C. */ { register int radix; /* 8, 10, or 16 */ int expseen; /* 'e' seen in floater */ int signseen; /* '+' or '-' seen */ int octal89; /* For bad octal test */ int dotflag; /* TRUE if '.' was seen */ expseen = FALSE; /* No exponent seen yet */ signseen = TRUE; /* No +/- allowed yet */ octal89 = FALSE; /* No bad octal yet */ radix = 10; /* Assume decimal */ if ((dotflag = (c == '.')) != FALSE) { /* . something? */ (*outfun)('.'); /* Always out the dot */ if (type[(c = get())] != DIG) { /* If not a float numb, */ unget(); /* Rescan strange char */ return; /* All done for now */ } } /* End of float test */ else if (c == '0') { /* Octal or hex? */ (*outfun)(c); /* Stuff initial zero */ radix = 8; /* Assume it's octal */ c = get(); /* Look for an 'x' */ if (c == 'x' || c == 'X') { /* Did we get one? */ radix = 16; /* Remember new radix */ (*outfun)(c); /* Stuff the 'x' */ c = get(); /* Get next character */ } } for (;;) { /* Process curr. char. */ /* * Note that this algorithm accepts "012e4" and "03.4" * as legitimate floating-point numbers. */ if (radix != 16 && (c == 'e' || c == 'E')) { if (expseen) /* Already saw 'E'? */ break; /* Exit loop, bad nbr. */ expseen = TRUE; /* Set exponent seen */ signseen = FALSE; /* We can read '+' now */ radix = 10; /* Decimal exponent */ } else if (radix != 16 && c == '.') { if (dotflag) /* Saw dot already? */ break; /* Exit loop, two dots */ dotflag = TRUE; /* Remember the dot */ radix = 10; /* Decimal fraction */ } else if (c == '+' || c == '-') { /* 1.0e+10 */ if (signseen) /* Sign in wrong place? */ break; /* Exit loop, not nbr. */ /* signseen = TRUE; */ /* Remember we saw it */ } else { /* Check the digit */ switch (c) { case '8': case '9': /* Sometimes wrong */ octal89 = TRUE; /* Do check later */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': break; /* Always ok */ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': if (radix == 16) /* Alpha's are ok only */ break; /* if reading hex. */ default: /* At number end */ goto done; /* Break from for loop */ } /* End of switch */ } /* End general case */ (*outfun)(c); /* Accept the character */ signseen = TRUE; /* Don't read sign now */ c = get(); /* Read another char */ } /* End of scan loop */ /* * When we break out of the scan loop, c contains the first * character (maybe) not in the number. If the number is an * integer, allow a trailing 'L' for long and/or a trailing 'U' * for unsigned. If not those, push the trailing character back * on the input stream. Floating point numbers accept a trailing * 'L' for "long double". */ done: if (dotflag || expseen) { /* Floating point? */ if (c == 'l' || c == 'L') { (*outfun)(c); c = get(); /* Ungotten later */ } } else { /* Else it's an integer */ /* * We know that dotflag and expseen are both zero, now: * dotflag signals "saw 'L'", and * expseen signals "saw 'U'". */ for (;;) { switch (c) { case 'l': case 'L': if (dotflag) goto nomore; dotflag = TRUE; break; case 'u': case 'U': if (expseen) goto nomore; expseen = TRUE; break; default: goto nomore; } (*outfun)(c); /* Got 'L' or 'U'. */ c = get(); /* Look at next, too. */ } } nomore: unget(); /* Not part of a number */ if (octal89 && radix == 8) cwarn("Illegal digit in octal number", NULLST); } save(c) register int c; { if (workp >= &work[NWORK]) cfatal("Work buffer overflow", NULLST); else *workp++ = c; } char * savestring(text) char *text; /* * Store a string into free memory. */ { register char *result; result = getmem(strlen(text) + 1); strcpy(result, text); return (result); } FILEINFO * getfile(bufsize, name) int bufsize; /* Line or define buffer size */ char *name; /* File or macro name string */ /* * Common FILEINFO buffer initialization for a new file or macro. */ { register FILEINFO *file; register int size; size = strlen(name); /* File/macro name */ file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size); file->parent = infile; /* Chain files together */ file->fp = NULL; /* No file yet */ file->filename = savestring(name); /* Save file/macro name */ file->progname = NULL; /* No #line seen yet */ file->unrecur = 0; /* No macro fixup */ file->bptr = file->buffer; /* Initialize line ptr */ file->buffer[0] = EOS; /* Force first read */ file->line = 0; /* (Not used just yet) */ if (infile != NULL) /* If #include file */ infile->line = line; /* Save current line */ infile = file; /* New current file */ line = 1; /* Note first line */ return (file); /* All done. */ } char * getmem(size) int size; /* * Get a block of free memory. */ { register char *result; extern char *malloc(); if ((result = malloc((unsigned) size)) == NULL) cfatal("Out of memory", NULLST); return (result); } /* * C P P S y m b o l T a b l e s */ /* * SBSIZE defines the number of hash-table slots for the symbol table. * It must be a power of 2. */ #ifndef SBSIZE #define SBSIZE 64 #endif #define SBMASK (SBSIZE - 1) #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1) << error, SBSIZE must be a power of 2 >> #endif static DEFBUF *symtab[SBSIZE]; /* Symbol table queue headers */ DEFBUF * lookid(c) int c; /* First character of token */ /* * Look for the next token in the symbol table. Returns token in "token". * If found, returns the table pointer; Else returns NULL. */ { register int nhash; register DEFBUF *dp; register char *np; int temp; int isrecurse; /* For #define foo foo */ np = token; nhash = 0; if ((isrecurse = (c == DEF_MAGIC))) /* If recursive macro */ c = get(); /* hack, skip DEF_MAGIC */ do { if (np < &token[IDMAX]) { /* token dim is IDMAX+1 */ *np++ = c; /* Store token byte */ nhash += c; /* Update hash value */ } c = get(); /* And get another byte */ } while (type[c] == LET || type[c] == DIG); unget(); /* Rescan terminator */ *np = EOS; /* Terminate token */ if (isrecurse) /* Recursive definition */ return (NULL); /* undefined just now */ nhash += (np - token); /* Fix hash value */ dp = symtab[nhash & SBMASK]; /* Starting bucket */ while (dp != (DEFBUF *) NULL) { /* Search symbol table */ if (dp->hash == nhash /* Fast precheck */ && (temp = strcmp(dp->name, token)) >= 0) break; dp = dp->link; /* Nope, try next one */ } return ((temp == 0) ? dp : NULL); } DEFBUF * defendel(name, delete) char *name; int delete; /* TRUE to delete a symbol */ /* * Enter this name in the lookup table (delete = FALSE) * or delete this name (delete = TRUE). * Returns a pointer to the define block (delete = FALSE) * Returns NULL if the symbol wasn't defined (delete = TRUE). */ { register DEFBUF *dp; register DEFBUF **prevp; register char *np; int nhash; int temp; int size; for (nhash = 0, np = name; *np != EOS;) nhash += *np++; size = (np - name); nhash += size; prevp = &symtab[nhash & SBMASK]; while ((dp = *prevp) != (DEFBUF *) NULL) { if (dp->hash == nhash && (temp = strcmp(dp->name, name)) >= 0) { if (temp > 0) dp = NULL; /* Not found */ else { *prevp = dp->link; /* Found, unlink and */ if (dp->repl != NULL) /* Free the replacement */ free(dp->repl); /* if any, and then */ free((char *) dp); /* Free the symbol */ } break; } prevp = &dp->link; } if (!delete) { dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size); dp->link = *prevp; *prevp = dp; dp->hash = nhash; dp->repl = NULL; dp->nargs = 0; strcpy(dp->name, name); } return (dp); } #if DEBUG dumpdef(why) char *why; { register DEFBUF *dp; register DEFBUF **syp; printf("CPP symbol table dump %s\n", why); for (syp = symtab; syp < &symtab[SBSIZE]; syp++) { if ((dp = *syp) != (DEFBUF *) NULL) { printf("symtab[%d]\n", (syp - symtab)); do { dumpadef((char *) NULL, dp); } while ((dp = dp->link) != (DEFBUF *) NULL); } } } dumpadef(why, dp) char *why; /* Notation */ register DEFBUF *dp; { register char *cp; register int c; printf(" \"%s\" [%d]", dp->name, dp->nargs); if (why != NULL) printf(" (%s)", why); if (dp->repl != NULL) { printf(" => "); for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) { if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) printf("<%d>", c - MAC_PARM); else if (isprint(c) || c == '\n' || c == '\t') putchar(c); else if (c < ' ') printf("<^%c>", c + '@'); else printf("<\\0%o>", c); } } else { printf(", no replacement."); } putchar('\n'); } #endif /* * G E T */ int get() /* * Return the next character from a macro or the current file. * Handle end of file from #include files. */ { register int c; register FILEINFO *file; register int popped; /* Recursion fixup */ popped = 0; get_from_file: if ((file = infile) == NULL) return (EOF_CHAR); newline: #if 0 printf("get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n", file->filename, recursion, line, file->bptr - file->buffer, file->buffer); #endif /* * Read a character from the current input line or macro. * At EOS, either finish the current macro (freeing temp. * storage) or read another line from the current input file. * At EOF, exit the current file (#include) or, at EOF from * the cpp input file, return EOF_CHAR to finish processing. */ if ((c = *file->bptr++ & 0xFF) == EOS) { /* * Nothing in current line or macro. Get next line (if * input from a file), or do end of file/macro processing. * In the latter case, jump back to restart from the top. */ if (file->fp == NULL) { /* NULL if macro */ popped++; recursion -= file->unrecur; if (recursion < 0) recursion = 0; infile = file->parent; /* Unwind file chain */ } else { /* Else get from a file */ if ((file->bptr = fgets(file->buffer, NBUFF, file->fp)) != NULL) { #if DEBUG if (debug > 1) { /* Dump it to stdout */ printf("\n#line %d (%s), %s", line, file->filename, file->buffer); } #endif goto newline; /* process the line */ } else { fclose(file->fp); /* Close finished file */ if ((infile = file->parent) != NULL) { /* * There is an "ungotten" newline in the current * infile buffer (set there by doinclude() in * cpp1.c). Thus, we know that the mainline code * is skipping over blank lines and will do a * #line at its convenience. */ wrongline = TRUE; /* Need a #line now */ } } } /* * Free up space used by the (finished) file or macro and * restart input from the parent file/macro, if any. */ free(file->filename); /* Free name and */ if (file->progname != NULL) /* if a #line was seen, */ free(file->progname); /* free it, too. */ free((char *) file); /* Free file space */ if (infile == NULL) /* If at end of file */ return (EOF_CHAR); /* Return end of file */ line = infile->line; /* Reset line number */ goto get_from_file; /* Get from the top. */ } /* * Common processing for the new character. */ if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */ goto newline; /* from a file */ if (file->parent != NULL) { /* Macro or #include */ if (popped != 0) file->parent->unrecur += popped; else { recursion -= file->parent->unrecur; if (recursion < 0) recursion = 0; file->parent->unrecur = 0; } } if (c == '\n') /* Maintain current */ ++line; /* line counter */ if (instring) /* Strings just return */ return (c); /* the character. */ else if (c == '/') { /* Comment? */ instring = TRUE; /* So get() won't loop */ if ((c = get()) != '*') { /* Next byte '*'? */ instring = FALSE; /* Nope, no comment */ unget(); /* Push the char. back */ return ('/'); /* Return the slash */ } if (keepcomments) { /* If writing comments */ putchar('/'); /* Write out the */ putchar('*'); /* initializer */ } for (;;) { /* Eat a comment */ c = get(); test: if (keepcomments && c != EOF_CHAR) cput(c); switch (c) { case EOF_CHAR: cerror("EOF in comment", NULLST); return (EOF_CHAR); case '/': if ((c = get()) != '*') /* Don't let comments */ goto test; /* Nest. */ cwarn("Nested comments", NULLST); /* Fall into * stuff */ case '*': if ((c = get()) != '/') /* If comment doesn't */ goto test; /* end, look at next */ instring = FALSE; /* End of comment, */ if (keepcomments) { /* Put out the comment */ cput(c); /* terminator, too */ } /* * A comment is syntactically "whitespace" -- * however, there are certain strange sequences * such as * #define foo(x) (something) * foo|* comment *|(123) * these are '/' ^ ^ * where just returning space (or COM_SEP) will cause * problems. This can be "fixed" by overwriting the * '/' in the input line buffer with ' ' (or COM_SEP) * but that may mess up an error message. * So, we peek ahead -- if the next character is * "whitespace" we just get another character, if not, * we modify the buffer. All in the name of purity. */ if (*file->bptr == '\n' || type[*file->bptr & 0xFF] == SPA) goto newline; #if COMMENT_INVISIBLE /* * Return magic (old-fashioned) syntactic space. */ return ((file->bptr[-1] = COM_SEP)); #else return ((file->bptr[-1] = ' ')); #endif case '\n': /* we'll need a #line */ if (!keepcomments) wrongline = TRUE; /* later... */ default: /* Anything else is */ break; /* Just a character */ } /* End switch */ } /* End comment loop */ } /* End if in comment */ else if (!inmacro && c == '\\') { /* If backslash, peek */ if ((c = get()) == '\n') { /* for a . If so, */ wrongline = TRUE; goto newline; } else { /* Backslash anything */ unget(); /* Get it later */ return ('\\'); /* Return the backslash */ } } else if (c == '\f' || c == VT) /* Form Feed, Vertical */ c = ' '; /* Tab are whitespace */ return (c); /* Just return the char */ } unget() /* * Backup the pointer to reread the last character. Fatal error * (code bug) if we backup too far. unget() may be called, * without problems, at end of file. Only one character may * be ungotten. If you need to unget more, call ungetstring(). */ { register FILEINFO *file; if ((file = infile) == NULL) return; /* Unget after EOF */ if (--file->bptr < file->buffer) cfatal("Too much pushback", NULLST); if (*file->bptr == '\n') /* Ungetting a newline? */ --line; /* Unget the line number, too */ } ungetstring(text) char *text; /* * Push a string back on the input stream. This is done by treating * the text as if it were a macro. */ { register FILEINFO *file; extern FILEINFO *getfile(); file = getfile(strlen(text) + 1, ""); strcpy(file->buffer, text); } int cget() /* * Get one character, absorb "funny space" after comments or * token concatenation */ { register int c; do { c = get(); #if COMMENT_INVISIBLE } while (c == TOK_SEP || c == COM_SEP); #else } while (c == TOK_SEP); #endif return (c); } /* * Error messages and other hacks. The first byte of severity * is 'S' for string arguments and 'I' for int arguments. This * is needed for portability with machines that have int's that * are shorter than char *'s. */ static domsg(severity, format, arg) char *severity; /* "Error", "Warning", "Fatal" */ char *format; /* Format for the error message */ char *arg; /* Something for the message */ /* * Print filenames, macro names, and line numbers for error messages. */ { register char *tp; register FILEINFO *file; fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]); if (*severity == 'S') fprintf(stderr, format, arg); else fprintf(stderr, format, (int) arg); putc('\n', stderr); if ((file = infile) == NULL) return; /* At end of file */ if (file->fp != NULL) { tp = file->buffer; /* Print current file */ fprintf(stderr, "%s", tp); /* name, making sure */ if (tp[strlen(tp) - 1] != '\n') /* there's a newline */ putc('\n', stderr); } while ((file = file->parent) != NULL) { /* Print #includes, too */ if (file->fp == NULL) fprintf(stderr, "from macro %s\n", file->filename); else { tp = file->buffer; fprintf(stderr, "from file %s, line %d:\n%s", (file->progname != NULL) ? file->progname : file->filename, file->line, tp); if (tp[strlen(tp) - 1] != '\n') putc('\n', stderr); } } } cerror(format, sarg) char *format; char *sarg; /* Single string argument */ /* * Print a normal error message, string argument. */ { domsg("SError", format, sarg); errors++; } cierror(format, narg) char *format; int narg; /* Single numeric argument */ /* * Print a normal error message, numeric argument. */ { domsg("IError", format, (char *) narg); errors++; } cfatal(format, sarg) char *format; char *sarg; /* Single string argument */ /* * A real disaster */ { domsg("SFatal error", format, sarg); exit(IO_ERROR); } cwarn(format, sarg) char *format; char *sarg; /* Single string argument */ /* * A non-fatal error, string argument. */ { domsg("SWarning", format, sarg); } ciwarn(format, narg) char *format; int narg; /* Single numeric argument */ /* * A non-fatal error, numeric argument. */ { domsg("IWarning", format, (char *) narg); }