From 3ff81670848abb29b92e78f45080ad36cc85001c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 13:09:03 -0700 Subject: Starting over with the C99 grammar for the preprocessor. This is a fresh start with a much simpler approach for the flex/bison portions of the preprocessor. This isn't functional yet, (produces no output), but can at least read all of our test cases without any parse errors. The grammar here is based on the grammar provided for the preprocessor in the C99 specification. --- glcpp-lex.l | 197 ++++----------------------- glcpp-parse.y | 401 +++++++++---------------------------------------------- tests/glcpp-test | 5 +- 3 files changed, 98 insertions(+), 505 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index ee1f6e3aeea..f1dd11ea9bd 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,14 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_DEFINE_OBJ_OR_FUNC -%x ST_DEFINE_PARAMETER -%x ST_DEFINE_VALUE -%x ST_IF -%x ST_UNDEF -%x ST_UNDEF_END - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -TOKEN [^[:space:](),]+ +PUNCTUATION [][(){}.&*~!/%<>^|;,+-] +OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? OCTAL_INTEGER 0[0-7]*[uU]? @@ -54,208 +47,74 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% -{HASH}if{HSPACE}* { - BEGIN ST_IF; - return IF; -} - -{HASH}elif{HSPACE}* { - BEGIN ST_IF; - return ELIF; +{HASH}define{HSPACE}+/{IDENTIFIER}"(" { + return HASH_DEFINE_FUNC; } -{DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; +{HASH}define { + return HASH_DEFINE_OBJ; } -{OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; +{HASH}undef { + return HASH_UNDEF; } -{HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; +{HASH} { + return HASH; } -"defined" { - return DEFINED; +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -[-+*/%<>&^|()~] { - return yytext[0]; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HASH}endif{HSPACE}* { - return ENDIF; +"##" { + return PASTE; } -{HASH}else{HSPACE}* { - return ELSE; -} - -{HASH}undef{HSPACE}* { - BEGIN ST_UNDEF; - return UNDEF; -} - -{IDENTIFIER} { - BEGIN ST_UNDEF_END; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{HSPACE}* - -\n { - BEGIN INITIAL; -} - - /* We use the ST_DEFINE and ST_DEFVAL states so that we can - * pass a space token, (yes, a token for whitespace!), since - * the preprocessor specification requires distinguishing - * "#define foo()" from "#define foo ()". - */ -{HASH}define{HSPACE}* { - BEGIN ST_DEFINE; - return DEFINE; -} - -{IDENTIFIER} { - BEGIN ST_DEFINE_OBJ_OR_FUNC; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HSPACE}+ { - BEGIN ST_DEFINE_VALUE; - return SPACE; -} - -"(" { - BEGIN ST_DEFINE_PARAMETER; - return '('; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"," { - return ','; -} - -")" { - BEGIN ST_DEFINE_VALUE; - return ')'; -} - -{HSPACE}+ - -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -[(),] { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; +{PUNCTUATION} { + return yytext[0]; } -{HSPACE}+ - -\n { - BEGIN INITIAL; +\n { return NEWLINE; } -{IDENTIFIER} { - int parameter_index; +{OTHER} { yylval.str = xtalloc_strdup (yyextra, yytext); - switch (glcpp_parser_classify_token (yyextra, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - - } -} - -[(),] { - return yytext[0]; -} - -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -\n { - yyextra->need_newline = 1; + return OTHER; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 2c0fe9a6af9..ebb28ed1965 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -119,366 +119,97 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type punctuator -%type expression INTEGER -%type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO -%type argument_list -%type macro parameter_list -%type TOKEN argument_word argument_word_or_comma -%type argument argument_or_comma replacement_list pp_tokens -%left OR -%left AND -%left '|' -%left '^' -%left '&' -%left EQUAL NOT_EQUAL -%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL -%left LEFT_SHIFT RIGHT_SHIFT -%left '+' '-' -%left '*' '/' '%' -%right UNARY - -/* Hard to remove shift/reduce conflicts documented as follows: - * - * 1. '(' after FUNC_MACRO name which is correctly resolved to shift - * to form macro invocation rather than reducing directly to - * content. - * - * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to - * shift to form macro invocation rather than reducing directly to - * argument. - * - * 3. Similarly again now that we added argument_or_comma as well. - */ -%expect 3 +%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE + + /* Stale stuff just to allow code to compile. */ +%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO %% - /* We do all printing at the input level. */ input: - /* empty */ { - parser->just_printed_separator = 1; - } -| input content { - int is_token; - int skipping = 0; - - if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP) - skipping = 1; - - if ($2 && strlen ($2) && ! skipping) { - int c = $2[0]; - int is_not_separator = ((c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - (c == '_')); - - if (! parser->just_printed_separator && is_not_separator) - { - printf (" "); - } - printf ("%s", $2); - - if (is_not_separator) - parser->just_printed_separator = 0; - else - parser->just_printed_separator = 1; - } - - if ($2) - talloc_free ($2); - - if (parser->need_newline) { - printf ("\n"); - parser->just_printed_separator = 1; - parser->need_newline = 0; - } - } -; - -content: - IDENTIFIER { - $$ = $1; - } -| IDENTIFIER_FINALIZED { - $$ = $1; - } -| TOKEN { - $$ = $1.value; - } -| FUNC_MACRO { - $$ = $1; - } -| directive { - $$ = talloc_strdup (parser, "\n"); - } -| punctuator { - $$ = talloc_asprintf (parser, "%c", $1); - } -| macro { - $$ = NULL; - } + /* empty */ +| input line ; -punctuator: - '(' { $$ = '('; } -| ')' { $$ = ')'; } -| ',' { $$ = ','; } - ; - -macro: - FUNC_MACRO '(' argument_list ')' { - _expand_function_macro (parser, $1, $3); - } -| OBJ_MACRO { - _expand_object_macro (parser, $1); - talloc_free ($1); - } +line: + control_line +| text_line +| HASH non_directive ; -argument_list: - /* empty */ { - $$ = _argument_list_create (parser); - } -| argument { - $$ = _argument_list_create (parser); - _argument_list_append ($$, $1); - } -| argument_list ',' argument { - _argument_list_append ($1, $3); - $$ = $1; - } -; - -argument: - argument_word { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| argument argument_word { - _token_list_append ($1, $2.type, $2.value); - talloc_free ($2.value); - $$ = $1; - } -| argument '(' argument_or_comma ')' { - _token_list_append ($1, '(', "("); - _token_list_append_list ($1, $3); - _token_list_append ($1, ')', ")"); - $$ = $1; - } +control_line: + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_UNDEF IDENTIFIER NEWLINE +| HASH NEWLINE ; -argument_word: - IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } -| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } -| TOKEN { $$ = $1; } -| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } -| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } +identifier_list: + IDENTIFIER +| identifier_list ',' IDENTIFIER ; - /* XXX: The body of argument_or_comma is the same as the body - * of argument, but with "argument" and "argument_word" - * changed to "argument_or_comma" and - * "argument_word_or_comma". It would be nice to have less - * redundancy here, but I'm not sure how. - * - * It would also be nice to have a less ugly grammar to have - * to implement, but such is the C preprocessor. - */ -argument_or_comma: - argument_word_or_comma { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| argument_or_comma argument_word_or_comma { - _token_list_append ($1, $2.type, $2.value); - $$ = $1; - } -| argument_or_comma '(' argument_or_comma ')' { - _token_list_append ($1, '(', "("); - _token_list_append_list ($1, $3); - _token_list_append ($1, ')', ")"); - $$ = $1; - } +text_line: + NEWLINE +| pp_tokens NEWLINE ; -argument_word_or_comma: - IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } -| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } -| TOKEN { $$ = $1; } -| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } -| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } -| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); } +non_directive: + pp_tokens NEWLINE ; -directive: - DEFINE IDENTIFIER NEWLINE { - token_list_t *list = _token_list_create (parser); - _define_object_macro (parser, $2, list); - } -| DEFINE IDENTIFIER SPACE replacement_list NEWLINE { - _define_object_macro (parser, $2, $4); - } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { - _define_function_macro (parser, $2, $4, $6); - } -| IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); - } -| IFDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - _glcpp_parser_skip_stack_push_if (parser, macro != NULL); - } -| IFNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - _glcpp_parser_skip_stack_push_if (parser, macro == NULL); - } -| ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); - } -| ELSE { - _glcpp_parser_skip_stack_change_if (parser, "else", 1); - } -| ENDIF { - _glcpp_parser_skip_stack_pop (parser); - } -| UNDEF IDENTIFIER { - string_list_t *macro = hash_table_find (parser->defines, $2); - if (macro) { - /* XXX: Need hash table to support a real way - * to remove an element rather than prefixing - * a new node with data of NULL like this. */ - hash_table_insert (parser->defines, NULL, $2); - talloc_free (macro); - } - talloc_free ($2); - } +replacement_list: + /* empty */ +| pp_tokens ; -expression: - INTEGER { - $$ = $1; - } -| expression OR expression { - $$ = $1 || $3; - } -| expression AND expression { - $$ = $1 && $3; - } -| expression '|' expression { - $$ = $1 | $3; - } -| expression '^' expression { - $$ = $1 ^ $3; - } -| expression '&' expression { - $$ = $1 & $3; - } -| expression NOT_EQUAL expression { - $$ = $1 != $3; - } -| expression EQUAL expression { - $$ = $1 == $3; - } -| expression GREATER_OR_EQUAL expression { - $$ = $1 >= $3; - } -| expression LESS_OR_EQUAL expression { - $$ = $1 <= $3; - } -| expression '>' expression { - $$ = $1 > $3; - } -| expression '<' expression { - $$ = $1 < $3; - } -| expression RIGHT_SHIFT expression { - $$ = $1 >> $3; - } -| expression LEFT_SHIFT expression { - $$ = $1 << $3; - } -| expression '-' expression { - $$ = $1 - $3; - } -| expression '+' expression { - $$ = $1 + $3; - } -| expression '%' expression { - $$ = $1 % $3; - } -| expression '/' expression { - $$ = $1 / $3; - } -| expression '*' expression { - $$ = $1 * $3; - } -| '!' expression %prec UNARY { - $$ = ! $2; - } -| '~' expression %prec UNARY { - $$ = ~ $2; - } -| '-' expression %prec UNARY { - $$ = - $2; - } -| '+' expression %prec UNARY { - $$ = + $2; - } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } -| '(' expression ')' { - $$ = $2; - } +pp_tokens: + preprocessing_token +| pp_tokens preprocessing_token ; -parameter_list: - /* empty */ { - $$ = _string_list_create (parser); - } -| IDENTIFIER { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - talloc_free ($1); - } -| parameter_list ',' IDENTIFIER { - _string_list_append_item ($1, $3); - talloc_free ($3); - $$ = $1; - } +preprocessing_token: + IDENTIFIER +| punctuator +| OTHER ; -replacement_list: - /* empty */ { - $$ = _token_list_create (parser); - } -| pp_tokens { - $$ = $1; - } +punctuator: + '[' +| ']' +| '(' +| ')' +| '{' +| '}' +| '.' +| '&' +| '*' +| '+' +| '-' +| '~' +| '!' +| '/' +| '%' +| LEFT_SHIFT +| RIGHT_SHIFT +| '<' +| '>' +| LESS_OR_EQUAL +| GREATER_OR_EQUAL +| EQUAL +| NOT_EQUAL +| '^' +| '|' +| AND +| OR +| ';' +| ',' +| PASTE ; -pp_tokens: - TOKEN { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| pp_tokens TOKEN { - _token_list_append ($1, $2.type, $2.value); - $$ = $1; - } -; - %% string_list_t * diff --git a/tests/glcpp-test b/tests/glcpp-test index 022a2367121..868b03cce83 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,9 +1,12 @@ #!/bin/sh +set -e + +echo "Caution: These results are just verifying parse-ability, not correctness!" for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -B -u $test.expected $test.out +# diff -B -u $test.expected $test.out done -- cgit v1.2.3 From 9bb796f33ac67abdf6c0bf55a06b0d8448caa3d3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:40:47 -0700 Subject: Add xtalloc_reference. Yet another talloc wrapper that should come in handy. --- glcpp.h | 6 ++++++ xtalloc.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/glcpp.h b/glcpp.h index 503731b85b3..6171ce8b4a0 100644 --- a/glcpp.h +++ b/glcpp.h @@ -164,4 +164,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n); char * xtalloc_asprintf (const void *t, const char *fmt, ...); +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location); + +#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__) + #endif diff --git a/xtalloc.c b/xtalloc.c index e52d12ac6b2..656ac2d6cb5 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...) va_end(ap); return ret; } + +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location) +{ + void *ret; + + ret = _talloc_reference_loc (context, ptr, location); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} -- cgit v1.2.3 From 80dc60b9c3529cf438948d50b9619e8af2fad880 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:42:00 -0700 Subject: Delete some trailing whitespace. This pernicious stuff managed to sneak in on us. --- glcpp-parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ebb28ed1965..c53370a89ad 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -95,7 +95,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); static void _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, int condition); - + static void _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); @@ -243,7 +243,7 @@ _string_list_append_item (string_list_t *list, const char *str) node = xtalloc (list, string_node_t); node->str = xtalloc_strdup (node, str); - + node->next = NULL; if (list->head == NULL) { @@ -404,7 +404,7 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } - + void yyerror (void *scanner, const char *error) { @@ -733,7 +733,7 @@ _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, parser->skip_stack->type = SKIP_TO_ENDIF; } } - + static void _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser) { -- cgit v1.2.3 From 808401fd79eea9fa2c965f9f235a753c0cb0d920 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:52:43 -0700 Subject: Store parsed tokens as token list and print all text lines. Still not doing any macro expansion just yet. But it should be fairly easy from here. --- glcpp-parse.y | 227 +++++++++++++++++++++++++++++++++++++++++-------------- glcpp.h | 27 +++++-- tests/glcpp-test | 5 +- 3 files changed, 195 insertions(+), 64 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index c53370a89ad..991b8a0b856 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -77,15 +77,29 @@ _argument_list_length (argument_list_t *list); token_list_t * _argument_list_member_at (argument_list_t *list, int index); +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str); + +token_t * +_token_create_ival (void *ctx, int type, int ival); + token_list_t * _token_list_create (void *ctx); +/* Note: This function add a talloc_reference() to token. + * + * You may want to talloc_unlink any current reference if you no + * longer need it. */ void -_token_list_append (token_list_t *list, int type, const char *value); +_token_list_append (token_list_t *list, token_t *token); void _token_list_append_list (token_list_t *list, token_list_t *tail); +void +_token_list_print (token_list_t *list); + static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -107,12 +121,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { - intmax_t imaxval; int ival; char *str; - argument_list_t *argument_list; - string_list_t *string_list; - token_t token; + token_t *token; token_list_t *token_list; } @@ -121,6 +132,10 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE +%type punctuator +%type IDENTIFIER OTHER +%type preprocessing_token +%type pp_tokens replacement_list text_line /* Stale stuff just to allow code to compile. */ %token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO @@ -134,7 +149,11 @@ input: line: control_line -| text_line +| text_line { + _token_list_print ($1); + printf ("\n"); + talloc_free ($1); + } | HASH non_directive ; @@ -152,7 +171,7 @@ identifier_list: ; text_line: - NEWLINE + NEWLINE { $$ = NULL; } | pp_tokens NEWLINE ; @@ -161,55 +180,68 @@ non_directive: ; replacement_list: - /* empty */ + /* empty */ { $$ = NULL; } | pp_tokens ; pp_tokens: - preprocessing_token -| pp_tokens preprocessing_token + preprocessing_token { + $$ = _token_list_create (parser); + _token_list_append ($$, $1); + talloc_unlink (parser, $1); + } +| pp_tokens preprocessing_token { + $$ = $1; + _token_list_append ($$, $2); + talloc_unlink (parser, $2); + } ; preprocessing_token: - IDENTIFIER -| punctuator -| OTHER + IDENTIFIER { + $$ = _token_create_str (parser, IDENTIFIER, $1); + } +| punctuator { + $$ = _token_create_ival (parser, $1, $1); + } +| OTHER { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: - '[' -| ']' -| '(' -| ')' -| '{' -| '}' -| '.' -| '&' -| '*' -| '+' -| '-' -| '~' -| '!' -| '/' -| '%' -| LEFT_SHIFT -| RIGHT_SHIFT -| '<' -| '>' -| LESS_OR_EQUAL -| GREATER_OR_EQUAL -| EQUAL -| NOT_EQUAL -| '^' -| '|' -| AND -| OR -| ';' -| ',' -| PASTE + '[' { $$ = '['; } +| ']' { $$ = ']'; } +| '(' { $$ = '('; } +| ')' { $$ = ')'; } +| '{' { $$ = '{'; } +| '}' { $$ = '}'; } +| '.' { $$ = '.'; } +| '&' { $$ = '&'; } +| '*' { $$ = '*'; } +| '+' { $$ = '+'; } +| '-' { $$ = '-'; } +| '~' { $$ = '~'; } +| '!' { $$ = '!'; } +| '/' { $$ = '/'; } +| '%' { $$ = '%'; } +| LEFT_SHIFT { $$ = LEFT_SHIFT; } +| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } +| '<' { $$ = '<'; } +| '>' { $$ = '>'; } +| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } +| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } +| EQUAL { $$ = EQUAL; } +| NOT_EQUAL { $$ = NOT_EQUAL; } +| '^' { $$ = '^'; } +| '|' { $$ = '|'; } +| AND { $$ = AND; } +| OR { $$ = OR; } +| ';' { $$ = ';'; } +| ',' { $$ = ','; } +| PASTE { $$ = PASTE; } ; - %% string_list_t * @@ -361,6 +393,77 @@ _argument_list_member_at (argument_list_t *list, int index) return NULL; } +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.str = talloc_steal (token, str); + + return token; +} + +token_t * +_token_create_ival (void *ctx, int type, int ival) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.ival = ival; + + return token; +} + +void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + token_list_t * _token_list_create (void *ctx) { @@ -374,13 +477,12 @@ _token_list_create (void *ctx) } void -_token_list_append (token_list_t *list, int type, const char *value) +_token_list_append (token_list_t *list, token_t *token) { token_node_t *node; node = xtalloc (list, token_node_t); - node->type = type; - node->value = xtalloc_strdup (list, value); + node->token = xtalloc_reference (list, token); node->next = NULL; @@ -405,6 +507,21 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } +void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _token_print (node->token); + if (node->next) + printf (" "); + } +} + void yyerror (void *scanner, const char *error) { @@ -598,7 +715,8 @@ _expand_function_macro (glcpp_parser_t *parser, expanded = _token_list_create (macro); for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, i->value, + if (_string_list_contains (macro->parameters, + i->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -606,11 +724,10 @@ _expand_function_macro (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->type, - j->value); + _token_list_append (expanded, j->token); } } else { - _token_list_append (expanded, i->type, i->value); + _token_list_append (expanded, i->token); } } @@ -644,10 +761,10 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - token = replacements->value; + token = replacements->token->value.str; /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->value, "##") == 0) { + if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { token_node_t *next_node; next_node = replacements->next->next; @@ -658,7 +775,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) } token = xtalloc_asprintf (parser, "%s%s", - token, next_node->value); + token, next_node->token->value.str); expansion->replacements = next_node->next; } @@ -671,7 +788,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) yylval.str = xtalloc_strdup (parser, token); /* Carefully refuse to expand any finalized identifier. */ - if (replacements->type == IDENTIFIER_FINALIZED) + if (replacements->token->type == IDENTIFIER_FINALIZED) return IDENTIFIER_FINALIZED; switch (glcpp_parser_classify_token (parser, yylval.str, diff --git a/glcpp.h b/glcpp.h index 6171ce8b4a0..261254a17c4 100644 --- a/glcpp.h +++ b/glcpp.h @@ -44,21 +44,34 @@ typedef struct string_list { string_node_t *tail; } string_list_t; -typedef struct token { +typedef struct token token_t; +typedef struct token_list token_list_t; + +typedef union YYSTYPE +{ + int ival; + char *str; + token_t *token; + token_list_t *token_list; +} YYSTYPE; + +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 + +struct token { int type; - char *value; -} token_t; + YYSTYPE value; +}; typedef struct token_node { - int type; - const char *value; + token_t *token; struct token_node *next; } token_node_t; -typedef struct token_list { +struct token_list { token_node_t *head; token_node_t *tail; -} token_list_t; +}; typedef struct argument_node { token_list_t *argument; diff --git a/tests/glcpp-test b/tests/glcpp-test index 868b03cce83..34cca883301 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -7,6 +7,7 @@ for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected -# diff -B -u $test.expected $test.out +# grep -v '^#' < $test.gcc > $test.expected + grep -v '^[ ]*#' < $test > $test.expected + diff -w -u $test.expected $test.out done -- cgit v1.2.3 From 9fb8b7a495c9dc6f9a62cf82300fae5925af92fc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:04:32 -0700 Subject: Make the lexer pass whitespace through (as OTHER tokens) for text lines. With this change, we can recreate the original text-line input exactly. Previously we were inserting a space between every pair of tokens so our output had a lot more whitespace than our input. With this change, we can drop the "-b" option to diff and match the input exactly. --- glcpp-lex.l | 72 +++++++++++++++++++++++++++++++++++++++++++++----------- glcpp-parse.y | 2 -- tests/glcpp-test | 2 +- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f1dd11ea9bd..7b5cdd57a0f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,21 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" + /* This lexer has two states: + * + * The CONTROL state is for control lines (directives) + * It lexes exactly as specified in the C99 specification. + * + * The INITIAL state is for input lines. In this state, we + * make the OTHER token much more broad in that it now + * includes tokens consisting entirely of whitespace. This + * allows us to pass text through verbatim. It avoids the + * "inadvertent token pasting" problem that would occur if we + * just printed tokens, while also avoiding excess whitespace + * insertion in the output.*/ + +%x CONTROL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -48,75 +63,104 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { + BEGIN CONTROL; return HASH_DEFINE_FUNC; } {HASH}define { + BEGIN CONTROL; return HASH_DEFINE_OBJ; } {HASH}undef { + BEGIN CONTROL; return HASH_UNDEF; } {HASH} { + BEGIN CONTROL; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -\n { +{OTHER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ + +\n { + BEGIN INITIAL; return NEWLINE; } -{OTHER} { +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +{OTHER}+ { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } -{HSPACE}+ +\n { + return NEWLINE; +} + +. { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 991b8a0b856..957421b864e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -517,8 +517,6 @@ _token_list_print (token_list_t *list) for (node = list->head; node; node = node->next) { _token_print (node->token); - if (node->next) - printf (" "); } } diff --git a/tests/glcpp-test b/tests/glcpp-test index 34cca883301..8074e471197 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -9,5 +9,5 @@ for test in *.c; do gcc -E $test -o $test.gcc # grep -v '^#' < $test.gcc > $test.expected grep -v '^[ ]*#' < $test > $test.expected - diff -w -u $test.expected $test.out + diff -u $test.expected $test.out done -- cgit v1.2.3 From ae6517f4a83981ae363bbbfe439ec23e8deb04b1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:24:59 -0700 Subject: Implement expansion of object-like macros. For this we add an "active" string_list_t to the parser. This makes the current expansion_list_t in the parser obsolete, but we don't remove that yet. With this change we can now start passing some actual tests, so we turn on real testing in the test suite again. I expect to implement things more or less in the same order as before, so the test suite now halts on first error. With this change the first 8 tests in the suite pass, (object-like macros with chaining and recursion). --- glcpp-parse.y | 128 +++++++++++++++++++++++++++++++++++++++++++++++-------- glcpp.h | 1 + tests/glcpp-test | 5 +-- 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 957421b864e..b3ef177a6da 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -59,6 +59,12 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); +void +_string_list_push (string_list_t *list, const char *str); + +void +_string_list_pop (string_list_t *list); + int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -98,7 +104,8 @@ void _token_list_append_list (token_list_t *list, token_list_t *tail); void -_token_list_print (token_list_t *list); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -144,21 +151,24 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line +| input line { + printf ("\n"); + } ; line: control_line | text_line { - _token_list_print ($1); - printf ("\n"); + _glcpp_parser_print_expanded_token_list (parser, $1); talloc_free ($1); } | HASH non_directive ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + _define_object_macro (parser, $2, $3); + } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE | HASH_UNDEF IDENTIFIER NEWLINE @@ -287,6 +297,42 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } +void +_string_list_push (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = xtalloc (list, string_node_t); + node->str = xtalloc_strdup (node, str); + node->next = list->head; + + if (list->tail == NULL) { + list->tail = node; + } + list->head = node; +} + +void +_string_list_pop (string_list_t *list) +{ + string_node_t *node; + + node = list->head; + + if (node == NULL) { + fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); + exit (1); + } + + list->head = node->next; + if (list->tail == node) { + assert (node->next == NULL); + list->tail = NULL; + } + + talloc_free (node); +} + int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -507,19 +553,6 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } -void -_token_list_print (token_list_t *list) -{ - token_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) { - _token_print (node->token); - } -} - void yyerror (void *scanner, const char *error) { @@ -536,6 +569,7 @@ glcpp_parser_create (void) glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + parser->active = _string_list_create (parser); parser->expansions = NULL; parser->just_printed_separator = 1; @@ -605,6 +639,64 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } +void +_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, + token_t *token) +{ + const char *identifier; + macro_t *macro; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + _token_print (token); + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just print directly. */ + if (macro == NULL) { + printf ("%s", identifier); + return; + } + + /* We're not (yet) supporting function-like macros. */ + if (macro->is_function) { + printf ("%s", identifier); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); +} + +void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _glcpp_parser_print_expanded_token (parser, node->token); + if (node->next) + printf (" "); + } +} + void _define_object_macro (glcpp_parser_t *parser, const char *identifier, diff --git a/glcpp.h b/glcpp.h index 261254a17c4..bd599d73011 100644 --- a/glcpp.h +++ b/glcpp.h @@ -124,6 +124,7 @@ typedef struct skip_node { struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; + string_list_t *active; expansion_node_t *expansions; int just_printed_separator; int need_newline; diff --git a/tests/glcpp-test b/tests/glcpp-test index 8074e471197..63041552104 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,13 +1,10 @@ #!/bin/sh set -e -echo "Caution: These results are just verifying parse-ability, not correctness!" - for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc -# grep -v '^#' < $test.gcc > $test.expected - grep -v '^[ ]*#' < $test > $test.expected + grep -v '^#' < $test.gcc > $test.expected diff -u $test.expected $test.out done -- cgit v1.2.3 From e6fb7827c96451d4a09dfda31979a6b9cb27301e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:28:58 -0700 Subject: Implement #undef. Which is as simple as copying the former action back from the git history. Now all tests through test 11 pass. --- glcpp-parse.y | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index b3ef177a6da..830a6232d80 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -171,7 +171,17 @@ control_line: } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE -| HASH_UNDEF IDENTIFIER NEWLINE +| HASH_UNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + if (macro) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (macro); + } + talloc_free ($2); + } | HASH NEWLINE ; -- cgit v1.2.3 From b1854fdfb6b567fa61d544d8080e2acb4cc78dc1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:28:26 -0700 Subject: Implement simplified substitution for function-like macro invocation. This supports function-like macro invocation but without any argument substitution. This now makes test 11 through 14 pass. --- glcpp-lex.l | 14 ++++- glcpp-parse.y | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++------- glcpp.h | 1 + 3 files changed, 168 insertions(+), 21 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 7b5cdd57a0f..b1980742d39 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -144,6 +144,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return IDENTIFIER; } +"(" { + return '('; +} + +")" { + return ')'; +} + +"," { + return ','; +} + {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; @@ -151,7 +163,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; + return SPACE; } \n { diff --git a/glcpp-parse.y b/glcpp-parse.y index 830a6232d80..60b414e43a7 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -127,20 +127,14 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} -%union { - int ival; - char *str; - token_t *token; - token_list_t *token_list; -} - %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE %type punctuator -%type IDENTIFIER OTHER +%type IDENTIFIER OTHER SPACE +%type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -169,8 +163,12 @@ control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } -| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE -| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, NULL, $5); + } +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, $4, $6); + } | HASH_UNDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); if (macro) { @@ -186,8 +184,16 @@ control_line: ; identifier_list: - IDENTIFIER -| identifier_list ',' IDENTIFIER + IDENTIFIER { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + talloc_steal ($$, $1); + } +| identifier_list ',' IDENTIFIER { + $$ = $1; + _string_list_append_item ($$, $3); + talloc_steal ($$, $3); + } ; text_line: @@ -227,6 +233,9 @@ preprocessing_token: | OTHER { $$ = _token_create_str (parser, OTHER, $1); } +| SPACE { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: @@ -649,7 +658,14 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } -void +/* Print a non-macro token, or the expansion of an object-like macro. + * + * Returns 0 if this token is completely printed. + * + * Returns 1 in the case that 'token' is a function-like macro that + * needs further expansion. + */ +static int _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, token_t *token) { @@ -659,7 +675,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { _token_print (token); - return; + return 0; } /* Look up this identifier in the hash table. */ @@ -669,20 +685,135 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* Not a macro, so just print directly. */ if (macro == NULL) { printf ("%s", identifier); - return; + return 0; } - /* We're not (yet) supporting function-like macros. */ + /* For function-like macros return 1 for further processing. */ if (macro->is_function) { - printf ("%s", identifier); - return; + return 1; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return 0; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); + + return 0; +} + +typedef enum function_status +{ + FUNCTION_STATUS_SUCCESS, + FUNCTION_NOT_A_FUNCTION, + FUNCTION_UNBALANCED_PARENTHESES +} function_status_t; + +/* Find a set of function-like macro arguments by looking for a + * balanced set of parentheses. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. + * + * Return values: + * + * FUNCTION_STATUS_SUCCESS: + * + * Successfully parsed a set of function arguments. + * + * FUNCTION_NOT_A_FUNCTION: + * + * Macro name not followed by a '('. This is not an error, but + * simply that the macro name should be treated as a non-macro. + * + * FUNCTION_UNBLANCED_PARENTHESES + * + * Macro name is not followed by a balanced set of parentheses. + */ +static function_status_t +_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +{ + token_node_t *node = *node_ret, *last; + int paren_count; + int arg_count; + + last = node; + node = node->next; + + /* Ignore whitespace before first parenthesis. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != '(') + return FUNCTION_NOT_A_FUNCTION; + + paren_count = 0; + arg_count = 0; + do { + if (node->token->type == '(') + { + paren_count++; + } + else if (node->token->type == ')') + { + paren_count--; + } + else if (node->token->type == ',' && + paren_count == 1) + { + arg_count++; + } + + last = node; + node = node->next; + + } while (node && paren_count); + + if (node && paren_count) + return FUNCTION_UNBALANCED_PARENTHESES; + + *node_ret = last; + + return FUNCTION_STATUS_SUCCESS; +} + +/* Prints the expansion of *node (consuming further tokens from the + * list as necessary). Upon return *node will be the last consumed + * node, such that further processing can continue with node->next. */ +static void +_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, + token_node_t **node_ret) +{ + macro_t *macro; + token_node_t *node; + const char *identifier; + argument_list_t *arguments; + function_status_t status; + + node = *node_ret; + identifier = node->token->value.str; + + macro = hash_table_find (parser->defines, identifier); + + assert (macro->is_function); + + status = _find_arguments (node_ret, &arguments); + + switch (status) { + case FUNCTION_STATUS_SUCCESS: + break; + case FUNCTION_NOT_A_FUNCTION: printf ("%s", identifier); return; + case FUNCTION_UNBALANCED_PARENTHESES: + fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", + identifier); + exit (1); } _string_list_push (parser->active, identifier); @@ -696,12 +827,15 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node; + function_status_t function_status; if (list == NULL) return; for (node = list->head; node; node = node->next) { - _glcpp_parser_print_expanded_token (parser, node->token); + if (_glcpp_parser_print_expanded_token (parser, node->token)) + _glcpp_parser_print_expanded_function (parser, &node); + if (node->next) printf (" "); } diff --git a/glcpp.h b/glcpp.h index bd599d73011..043098b1347 100644 --- a/glcpp.h +++ b/glcpp.h @@ -51,6 +51,7 @@ typedef union YYSTYPE { int ival; char *str; + string_list_t *string_list; token_t *token; token_list_t *token_list; } YYSTYPE; -- cgit v1.2.3 From f34a0009dd07dbca4de5491744bd3618eae9458e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:59:02 -0700 Subject: Pass through literal space values from replacement lists. This makes test 15 pass and also dramatically simplifies the lexer. We were previously using a CONTROL state in the lexer to only emit SPACE tokens when on text lines. But that's not actually what we want. We need SPACE tokens in the replacement lists as well. Instead of a lexer state for this, we now simply set a "space_tokens" flag whenever we start constructing a pp_tokens list and clear the flag whenever we see a '#' introducing a directive. Much cleaner this way. --- glcpp-lex.l | 85 +++++++++++++---------------------------------------------- glcpp-parse.y | 10 +++---- glcpp.h | 1 + 3 files changed, 25 insertions(+), 71 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index b1980742d39..f6d0c8b7d67 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" - /* This lexer has two states: - * - * The CONTROL state is for control lines (directives) - * It lexes exactly as specified in the C99 specification. - * - * The INITIAL state is for input lines. In this state, we - * make the OTHER token much more broad in that it now - * includes tokens consisting entirely of whitespace. This - * allows us to pass text through verbatim. It avoids the - * "inadvertent token pasting" problem that would occur if we - * just printed tokens, while also avoiding excess whitespace - * insertion in the output.*/ - -%x CONTROL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -63,116 +48,84 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_FUNC; } {HASH}define { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_OBJ; } {HASH}undef { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_UNDEF; } {HASH} { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -{OTHER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"(" { - return '('; -} - -")" { - return ')'; -} - -"," { - return ','; -} - {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } {HSPACE}+ { - yylval.str = xtalloc_strdup (yyextra, yytext); - return SPACE; + if (yyextra->space_tokens) { + yylval.str = xtalloc_strdup (yyextra, yytext); + return SPACE; + } } \n { return NEWLINE; } -. { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 60b414e43a7..a1981995fd0 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -160,7 +160,7 @@ line: ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { @@ -212,6 +212,7 @@ replacement_list: pp_tokens: preprocessing_token { + parser->space_tokens = 1; $$ = _token_list_create (parser); _token_list_append ($$, $1); talloc_unlink (parser, $1); @@ -234,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, OTHER, $1); + $$ = _token_create_str (parser, SPACE, $1); } ; @@ -494,6 +495,7 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: + case SPACE: printf ("%s", token->value.str); break; case LEFT_SHIFT: @@ -589,6 +591,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->space_tokens = 1; parser->expansions = NULL; parser->just_printed_separator = 1; @@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); - - if (node->next) - printf (" "); } } diff --git a/glcpp.h b/glcpp.h index 043098b1347..f3760fa7a41 100644 --- a/glcpp.h +++ b/glcpp.h @@ -126,6 +126,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int space_tokens; expansion_node_t *expansions; int just_printed_separator; int need_newline; -- cgit v1.2.3 From f8ec4e0be86eee05f5a661a01864247fcd1a6b30 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:06:17 -0700 Subject: Add a test #0 to ensure that we don't do any inadvertent token pasting. This simply ensures that spaces in input line are preserved. --- tests/000-content-with-spaces.c | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/000-content-with-spaces.c diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c new file mode 100644 index 00000000000..a7fc918c908 --- /dev/null +++ b/tests/000-content-with-spaces.c @@ -0,0 +1 @@ +this is four tokens -- cgit v1.2.3 From e9397867ddce20a4263949f4b3a488fa99af3041 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:08:07 -0700 Subject: Collapse multiple spaces in input down to a single space. This is what gcc does, and it's actually less work to do this. Previously we were having to save the contents of space tokens as a string, but we don't need to do that now. We extend test #0 to exercise this feature here. --- glcpp-lex.l | 1 - glcpp-parse.y | 10 ++++++---- tests/000-content-with-spaces.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f6d0c8b7d67..516f42dee32 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -119,7 +119,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { if (yyextra->space_tokens) { - yylval.str = xtalloc_strdup (yyextra, yytext); return SPACE; } } diff --git a/glcpp-parse.y b/glcpp-parse.y index a1981995fd0..0460f71f746 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,8 +132,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -%type punctuator -%type IDENTIFIER OTHER SPACE +%type punctuator SPACE +%type IDENTIFIER OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -235,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, SPACE, $1); + $$ = _token_create_ival (parser, SPACE, SPACE); } ; @@ -495,9 +495,11 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: - case SPACE: printf ("%s", token->value.str); break; + case SPACE: + printf (" "); + break; case LEFT_SHIFT: printf ("<<"); break; diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c index a7fc918c908..696cb3a74fc 100644 --- a/tests/000-content-with-spaces.c +++ b/tests/000-content-with-spaces.c @@ -1 +1 @@ -this is four tokens +this is four tokens -- cgit v1.2.3 From 9ce18cf9837bee379dfd0f52a3df005c1797e544 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:32:21 -0700 Subject: Implement substitution of function parameters in macro calls. This makes tests 16 - 19 pass. --- glcpp-parse.y | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0460f71f746..eb93bad85d1 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -405,9 +405,6 @@ _argument_list_append (argument_list_t *list, token_list_t *argument) { argument_node_t *node; - if (argument == NULL || argument->head == NULL) - return; - node = xtalloc (list, argument_node_t); node->argument = argument; @@ -741,8 +738,9 @@ typedef enum function_status * Macro name is not followed by a balanced set of parentheses. */ static function_status_t -_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +_arguments_parse (argument_list_t *arguments, token_node_t **node_ret) { + token_list_t *argument; token_node_t *node = *node_ret, *last; int paren_count; int arg_count; @@ -757,6 +755,8 @@ _find_arguments (token_node_t **node_ret, argument_list_t **arguments) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; + argument = NULL; + paren_count = 0; arg_count = 0; do { @@ -771,7 +771,14 @@ _find_arguments (token_node_t **node_ret, argument_list_t **arguments) else if (node->token->type == ',' && paren_count == 1) { - arg_count++; + argument = NULL; + } + else { + if (argument == NULL) { + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + } + _token_list_append (argument, node->token); } last = node; @@ -799,6 +806,9 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, const char *identifier; argument_list_t *arguments; function_status_t status; + token_list_t *expanded; + token_node_t *i, *j; + int parameter_index; node = *node_ret; identifier = node->token->value.str; @@ -807,7 +817,8 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, assert (macro->is_function); - status = _find_arguments (node_ret, &arguments); + arguments = _argument_list_create (parser); + status = _arguments_parse (arguments, node_ret); switch (status) { case FUNCTION_STATUS_SUCCESS: @@ -821,10 +832,48 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, exit (1); } + if (macro->replacements == NULL) { + talloc_free (arguments); + return; + } + + + if (_argument_list_length (arguments) != + _string_list_length (macro->parameters)) + { + fprintf (stderr, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _argument_list_length (arguments), + _string_list_length (macro->parameters)); + return; + } + + expanded = _token_list_create (arguments); + + for (i = macro->replacements->head; i; i = i->next) { + if (i->token->type == IDENTIFIER && + _string_list_contains (macro->parameters, + i->token->value.str, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + for (j = argument->head; j; j = j->next) + { + _token_list_append (expanded, j->token); + } + } else { + _token_list_append (expanded, i->token); + } + } + _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, - macro->replacements); + _glcpp_parser_print_expanded_token_list (parser, expanded); _string_list_pop (parser->active); + + talloc_free (arguments); } void -- cgit v1.2.3 From c7581c2e6e6897eddc55c537c92417b813a8b81e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:41:07 -0700 Subject: Ignore separating whitespace at the beginning of a macro argument. This causes test 16 to pass. Tests 17-20 are also passing now, (though they would probably have passed before this change and simply weren't being run yet). --- glcpp-parse.y | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index eb93bad85d1..ec966580fc4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -743,7 +743,6 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) token_list_t *argument; token_node_t *node = *node_ret, *last; int paren_count; - int arg_count; last = node; node = node->next; @@ -757,9 +756,7 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) argument = NULL; - paren_count = 0; - arg_count = 0; - do { + for (paren_count = 0; node; last = node, node = node->next) { if (node->token->type == '(') { paren_count++; @@ -767,6 +764,11 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) else if (node->token->type == ')') { paren_count--; + if (paren_count == 0) { + last = node; + node = node->next; + break; + } } else if (node->token->type == ',' && paren_count == 1) @@ -775,16 +777,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) } else { if (argument == NULL) { + /* Don't treat initial whitespace as + * part of the arguement. */ + if (node->token->type == SPACE) + continue; argument = _token_list_create (arguments); _argument_list_append (arguments, argument); } _token_list_append (argument, node->token); } - - last = node; - node = node->next; - - } while (node && paren_count); + } if (node && paren_count) return FUNCTION_UNBALANCED_PARENTHESES; -- cgit v1.2.3 From 652fa272ea4bdb9bfe6cd7f8413b3a3b03972987 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:45:22 -0700 Subject: Avoid swallowing initial left parenthesis from nested macro invocation. We weren't including this left parenthesis in the argument's token list so the nested function invocation wasn not being recognized. With this fix, tests 21 and 22 now pass. --- glcpp-parse.y | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ec966580fc4..131102fab95 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -754,9 +754,12 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; + last = node; + node = node->next; + argument = NULL; - for (paren_count = 0; node; last = node, node = node->next) { + for (paren_count = 1; node; last = node, node = node->next) { if (node->token->type == '(') { paren_count++; @@ -770,7 +773,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) break; } } - else if (node->token->type == ',' && + + if (node->token->type == ',' && paren_count == 1) { argument = NULL; -- cgit v1.2.3 From 5aa7ea08093f727761d424ad090f44b116c8f0bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 18:39:43 -0700 Subject: Remove a bunch of old code and give the static treatment to what's left. We're no longer using the expansion stack, so its functions can go along with most of the body of glcpp_parser_lex that was using it. --- glcpp-parse.y | 262 +++++----------------------------------------------------- glcpp.h | 7 -- 2 files changed, 21 insertions(+), 248 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 131102fab95..02286cd8e09 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,88 +28,77 @@ #include "glcpp.h" -void +static void yyerror (void *scanner, const char *error); -void +static void _define_object_macro (glcpp_parser_t *parser, const char *macro, token_list_t *replacements); -void +static void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, token_list_t *replacements); -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier); - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments); - -string_list_t * +static string_list_t * _string_list_create (void *ctx); -void +static void _string_list_append_item (string_list_t *list, const char *str); -void +static void _string_list_append_list (string_list_t *list, string_list_t *tail); -void +static void _string_list_push (string_list_t *list, const char *str); -void +static void _string_list_pop (string_list_t *list); -int +static int _string_list_contains (string_list_t *list, const char *member, int *index); -int +static int _string_list_length (string_list_t *list); -argument_list_t * +static argument_list_t * _argument_list_create (void *ctx); -void +static void _argument_list_append (argument_list_t *list, token_list_t *argument); -int +static int _argument_list_length (argument_list_t *list); -token_list_t * +static token_list_t * _argument_list_member_at (argument_list_t *list, int index); /* Note: This function talloc_steal()s the str pointer. */ -token_t * +static token_t * _token_create_str (void *ctx, int type, char *str); -token_t * +static token_t * _token_create_ival (void *ctx, int type, int ival); -token_list_t * +static token_list_t * _token_list_create (void *ctx); /* Note: This function add a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ -void +static void _token_list_append (token_list_t *list, token_t *token); -void +static void _token_list_append_list (token_list_t *list, token_list_t *tail); -void +static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser); - static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -591,10 +580,6 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; - parser->expansions = NULL; - - parser->just_printed_separator = 1; - parser->need_newline = 0; parser->skip_stack = NULL; @@ -610,8 +595,6 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { - if (parser->need_newline) - printf ("\n"); if (parser->skip_stack) fprintf (stderr, "Error: Unterminated #if\n"); glcpp_lex_destroy (parser->scanner); @@ -619,47 +602,6 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -static int -glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) -{ - expansion_node_t *node; - - for (node = parser->expansions; node; node = node->next) { - if (node->macro && - strcmp (node->macro->identifier, member) == 0) - { - return 1; - } - } - - return 0; -} - -token_class_t -glcpp_parser_classify_token (glcpp_parser_t *parser, - const char *identifier, - int *parameter_index) -{ - macro_t *macro; - - /* Is this token a defined macro? */ - macro = hash_table_find (parser->defines, identifier); - - if (macro == NULL) - return TOKEN_CLASS_IDENTIFIER; - - /* Don't consider this a macro if we are already actively - * expanding this macro. */ - if (glcpp_parser_is_expanding (parser, identifier)) - return TOKEN_CLASS_IDENTIFIER_FINALIZED; - - /* Definitely a macro. Just need to check if it's function-like. */ - if (macro->is_function) - return TOKEN_CLASS_FUNC_MACRO; - else - return TOKEN_CLASS_OBJ_MACRO; -} - /* Print a non-macro token, or the expansion of an object-like macro. * * Returns 0 if this token is completely printed. @@ -933,172 +875,10 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } -static void -_glcpp_parser_push_expansion (glcpp_parser_t *parser, - macro_t *macro, - token_node_t *replacements) -{ - expansion_node_t *node; - - node = xtalloc (parser, expansion_node_t); - - node->macro = macro; - node->replacements = replacements; - - node->next = parser->expansions; - parser->expansions = node; -} - -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser) -{ - expansion_node_t *node; - - node = parser->expansions; - - if (node == NULL) { - fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n"); - exit (1); - } - - parser->expansions = node->next; - - talloc_free (node); -} - -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier) -{ - macro_t *macro; - - macro = hash_table_find (parser->defines, identifier); - assert (! macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - _glcpp_parser_push_expansion (parser, macro, macro->replacements->head); -} - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments) -{ - macro_t *macro; - token_list_t *expanded; - token_node_t *i, *j; - int parameter_index; - - macro = hash_table_find (parser->defines, identifier); - assert (macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) - { - fprintf (stderr, - "Error: macro %s invoked with %d arguments (expected %d)\n", - identifier, - _argument_list_length (arguments), - _string_list_length (macro->parameters)); - return; - } - - expanded = _token_list_create (macro); - - for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, - i->token->value.str, - ¶meter_index)) - { - token_list_t *argument; - argument = _argument_list_member_at (arguments, - parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (expanded, j->token); - } - } else { - _token_list_append (expanded, i->token); - } - } - - _glcpp_parser_push_expansion (parser, macro, expanded->head); -} - static int glcpp_parser_lex (glcpp_parser_t *parser) { - expansion_node_t *expansion; - token_node_t *replacements; - int parameter_index; - const char *token; - token_class_t class; - - /* Who says C can't do efficient tail recursion? */ - RECURSE: - - expansion = parser->expansions; - - if (expansion == NULL) - return glcpp_lex (parser->scanner); - - replacements = expansion->replacements; - - /* Pop expansion when replacements is exhausted. */ - if (replacements == NULL) { - glcpp_parser_pop_expansion (parser); - goto RECURSE; - } - - expansion->replacements = replacements->next; - - token = replacements->token->value.str; - - /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { - token_node_t *next_node; - - next_node = replacements->next->next; - - if (next_node == NULL) { - fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n"); - exit (1); - } - - token = xtalloc_asprintf (parser, "%s%s", - token, next_node->token->value.str); - expansion->replacements = next_node->next; - } - - - if (strcmp (token, "(") == 0) - return '('; - else if (strcmp (token, ")") == 0) - return ')'; - - yylval.str = xtalloc_strdup (parser, token); - - /* Carefully refuse to expand any finalized identifier. */ - if (replacements->token->type == IDENTIFIER_FINALIZED) - return IDENTIFIER_FINALIZED; - - switch (glcpp_parser_classify_token (parser, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - default: - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - } + return glcpp_lex (parser->scanner); } static void diff --git a/glcpp.h b/glcpp.h index f3760fa7a41..6bd6e66a7cc 100644 --- a/glcpp.h +++ b/glcpp.h @@ -127,16 +127,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; - expansion_node_t *expansions; - int just_printed_separator; - int need_newline; skip_node_t *skip_stack; }; -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index); - glcpp_parser_t * glcpp_parser_create (void); -- cgit v1.2.3 From 10ae438399f14367dd9e03032594c1e16c428999 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 20:35:01 -0700 Subject: Avoid getting extra trailing whitespace from macros. This trailing whitespace was coming from macro definitions and from macro arguments. We fix this with a little extra state in the token_list. It now remembers the last non-space token added, so that these can be trimmed off just before printing the list. With this fix test 23 now passes. Tests 24 and 25 are also passing, but they probbably would ahve before this fix---just that they weren't being run earlier. --- glcpp-parse.y | 30 ++++++++++++++++++++++++++++-- glcpp.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 02286cd8e09..60eaf215b8b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -471,7 +471,7 @@ _token_create_ival (void *ctx, int type, int ival) } void -_token_print (token_t *token) +_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) { if (token->type < 256) { printf ("%c", token->type); @@ -527,6 +527,7 @@ _token_list_create (void *ctx) list = xtalloc (ctx, token_list_t); list->head = NULL; list->tail = NULL; + list->non_space_tail = NULL; return list; } @@ -548,6 +549,8 @@ _token_list_append (token_list_t *list, token_t *token) } list->tail = node; + if (token->type != SPACE) + list->non_space_tail = node; } void @@ -560,6 +563,25 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) } list->tail = tail->tail; + list->non_space_tail = tail->non_space_tail; +} + +void +_token_list_trim_trailing_space (token_list_t *list) +{ + token_node_t *tail, *next; + + if (list->non_space_tail) { + tail = list->non_space_tail->next; + list->non_space_tail->next = NULL; + list->tail = list->non_space_tail; + + while (tail) { + next = tail->next; + talloc_free (tail); + tail = next; + } + } } void @@ -618,7 +640,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_print (token); + _glcpp_parser_print_token (parser, token); return 0; } @@ -719,6 +741,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { + if (argument) + _token_list_trim_trailing_space (argument); argument = NULL; } else { @@ -834,6 +858,8 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, if (list == NULL) return; + _token_list_trim_trailing_space (list); + for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); diff --git a/glcpp.h b/glcpp.h index 6bd6e66a7cc..21db918cdce 100644 --- a/glcpp.h +++ b/glcpp.h @@ -72,6 +72,7 @@ typedef struct token_node { struct token_list { token_node_t *head; token_node_t *tail; + token_node_t *non_space_tail; }; typedef struct argument_node { -- cgit v1.2.3 From 039739b2da0ce8496f6e8d38127c0b3793607afa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 07:58:59 -0700 Subject: Defer test 26 until much later (to test 55). Supporting embedded newlines in a macro invocation is going to be tricky with our current approach to lexing and parsing. Since this isn't really an important feature for us, we can defer this until more important things are resolved. With this test out of the way, tests 27 through 31 are passing. --- tests/026-define-func-extra-newlines.c | 6 ------ tests/055-define-func-extra-newlines.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) delete mode 100644 tests/026-define-func-extra-newlines.c create mode 100644 tests/055-define-func-extra-newlines.c diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c deleted file mode 100644 index 0d837405309..00000000000 --- a/tests/026-define-func-extra-newlines.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo(a) bar - -foo -( -1 -) diff --git a/tests/055-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c new file mode 100644 index 00000000000..0d837405309 --- /dev/null +++ b/tests/055-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) -- cgit v1.2.3 From c0607d573e04846a23c3162901aabd7fc40ebc61 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:01:42 -0700 Subject: Check active expansions before expanding a function-like macro invocation. With this fix, test 32 no longer recurses infinitely, but now passes. --- glcpp-parse.y | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 60eaf215b8b..a2bff6e0ada 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -654,11 +654,6 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, return 0; } - /* For function-like macros return 1 for further processing. */ - if (macro->is_function) { - return 1; - } - /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { @@ -666,6 +661,11 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, return 0; } + /* For function-like macros return 1 for further processing. */ + if (macro->is_function) { + return 1; + } + _string_list_push (parser->active, identifier); _glcpp_parser_print_expanded_token_list (parser, macro->replacements); -- cgit v1.2.3 From 0197e9b64f0e64a617537c5ad1465b4a8706fe1c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:05:19 -0700 Subject: Change macro expansion to append onto token lists rather than printing directly. This doesn't change any functionality here, but will allow us to make future changes that were not possible with direct printing. Specifically, we need to expand macros within macro arguments before performing argument substitution. And *that* expansion cannot result in immediate printing. --- glcpp-parse.y | 193 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 120 insertions(+), 73 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index a2bff6e0ada..e25cfa92142 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -99,6 +99,11 @@ static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result); + static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -470,55 +475,6 @@ _token_create_ival (void *ctx, int type, int ival) return token; } -void -_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) -{ - if (token->type < 256) { - printf ("%c", token->type); - return; - } - - switch (token->type) { - case IDENTIFIER: - case OTHER: - printf ("%s", token->value.str); - break; - case SPACE: - printf (" "); - break; - case LEFT_SHIFT: - printf ("<<"); - break; - case RIGHT_SHIFT: - printf (">>"); - break; - case LESS_OR_EQUAL: - printf ("<="); - break; - case GREATER_OR_EQUAL: - printf (">="); - break; - case EQUAL: - printf ("=="); - break; - case NOT_EQUAL: - printf ("!="); - break; - case AND: - printf ("&&"); - break; - case OR: - printf ("||"); - break; - case PASTE: - printf ("##"); - break; - default: - fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); - break; - } -} - token_list_t * _token_list_create (void *ctx) { @@ -584,6 +540,67 @@ _token_list_trim_trailing_space (token_list_t *list) } } +static void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case SPACE: + printf (" "); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + +static void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + _token_print (node->token); +} + void yyerror (void *scanner, const char *error) { @@ -624,23 +641,26 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -/* Print a non-macro token, or the expansion of an object-like macro. +/* Appends onto 'expansion' a non-macro token or the expansion of an + * object-like macro. * - * Returns 0 if this token is completely printed. + * Returns 0 if this token is completely processed. * * Returns 1 in the case that 'token' is a function-like macro that * needs further expansion. */ static int -_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, - token_t *token) +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; + token_list_t *expansion; /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _glcpp_parser_print_token (parser, token); + _token_list_append (result, token); return 0; } @@ -648,16 +668,16 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, identifier = token->value.str; macro = hash_table_find (parser->defines, identifier); - /* Not a macro, so just print directly. */ + /* Not a macro, so just append. */ if (macro == NULL) { - printf ("%s", identifier); + _token_list_append (result, token); return 0; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { - printf ("%s", identifier); + _token_list_append (result, token); return 0; } @@ -667,8 +687,9 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, } _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, - macro->replacements); + _glcpp_parser_expand_token_list_onto (parser, + macro->replacements, + result); _string_list_pop (parser->active); return 0; @@ -770,15 +791,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ static void -_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, - token_node_t **node_ret) +_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; const char *identifier; argument_list_t *arguments; function_status_t status; - token_list_t *expanded; + token_list_t *substituted; token_node_t *i, *j; int parameter_index; @@ -796,7 +818,7 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, case FUNCTION_STATUS_SUCCESS: break; case FUNCTION_NOT_A_FUNCTION: - printf ("%s", identifier); + _token_list_append (result, node->token); return; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", @@ -809,7 +831,6 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, return; } - if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) { @@ -821,7 +842,8 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, return; } - expanded = _token_list_create (arguments); + /* Perform argument substitution on the replacement list. */ + substituted = _token_list_create (arguments); for (i = macro->replacements->head; i; i = i->next) { if (i->token->type == IDENTIFIER && @@ -834,36 +856,61 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->token); + _token_list_append (substituted, j->token); } } else { - _token_list_append (expanded, i->token); + _token_list_append (substituted, i->token); } } _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, expanded); + _glcpp_parser_expand_token_list_onto (parser, substituted, result); _string_list_pop (parser->active); talloc_free (arguments); } +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + { + if (_glcpp_parser_expand_token_onto (parser, node->token, + result)) + { + _glcpp_parser_expand_function_onto (parser, &node, + result); + } + } +} + void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { + token_list_t *expanded; token_node_t *node; function_status_t function_status; if (list == NULL) return; - _token_list_trim_trailing_space (list); + expanded = _token_list_create (parser); - for (node = list->head; node; node = node->next) { - if (_glcpp_parser_print_expanded_token (parser, node->token)) - _glcpp_parser_print_expanded_function (parser, &node); - } + _glcpp_parser_expand_token_list_onto (parser, list, expanded); + + _token_list_trim_trailing_space (expanded); + + _token_list_print (expanded); + + talloc_free (expanded); } void -- cgit v1.2.3 From d5cd40343f4a83d3270cb87ef38e85dcb9682e8c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:09:29 -0700 Subject: Expand macro arguments before performing argument substitution. As required by the C99 specification of the preprocessor. With this fix, tests 33 through 36 now pass. --- glcpp-parse.y | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index e25cfa92142..3b736f8e64d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -854,10 +854,11 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, token_list_t *argument; argument = _argument_list_member_at (arguments, parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (substituted, j->token); - } + /* Before substituting, we expand the argument + * tokens. */ + _glcpp_parser_expand_token_list_onto (parser, + argument, + substituted); } else { _token_list_append (substituted, i->token); } -- cgit v1.2.3 From b1ae61a2ee1bf2ba733dca417b0268b1106d83cf Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:10:38 -0700 Subject: Fix a typo in a comment. Always better to use proper grammar in our grammar. --- glcpp-parse.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 3b736f8e64d..5b792a976e6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -85,7 +85,7 @@ _token_create_ival (void *ctx, int type, int ival); static token_list_t * _token_list_create (void *ctx); -/* Note: This function add a talloc_reference() to token. +/* Note: This function adds a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ -- cgit v1.2.3 From c9dcc08d4512370b6fef6370afb8bcdb0ecd9292 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:11:08 -0700 Subject: README: Document some known limitations. None of these are fundamental---just a few things that haven't been implemented yet. --- README | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README b/README index ba833a49ffb..f0f64c2644a 100644 --- a/README +++ b/README @@ -12,3 +12,15 @@ preprocessors". To fill in these details, I've been using the C99 standard (for which I had a convenient copy) as available from: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + +Known limitations +----------------- +Macro invocations cannot include embedded newlines. + +The __LINE__, __FILE__, and __VERSION__ macros are not yet supported. + +The argument of the 'defined' operator cannot yet include enclosing +parentheses. + +The #error, #pragma, #extension, #version, and #line macros are not +yet supported. -- cgit v1.2.3 From ec4ada01c01338ae1deab634cf62f24344bdbd3a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:15:49 -0700 Subject: Prevent unexpanded macros from being expanded again in the future. With this fix, tests 37 - 39 now pass. --- glcpp-parse.y | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5b792a976e6..ec104330631 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -676,8 +676,17 @@ _glcpp_parser_expand_token_onto (glcpp_parser_t *parser, /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) { - _token_list_append (result, token); + if (_string_list_contains (parser->active, identifier, NULL)) + { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_t *new_token; + + str = xtalloc_strdup (result, token->value.str); + new_token = _token_create_str (result, OTHER, str); + _token_list_append (result, new_token); return 0; } -- cgit v1.2.3 From 63909fc19654ddb3ef339bcceed9cbf6e6a057bc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:16:56 -0700 Subject: Remove some stale token types. All the code referencing these was removed some time ago. --- glcpp-parse.y | 3 --- 1 file changed, 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ec104330631..04e78b1826f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,9 +132,6 @@ glcpp_parser_lex (glcpp_parser_t *parser); %type preprocessing_token %type pp_tokens replacement_list text_line - /* Stale stuff just to allow code to compile. */ -%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO - %% input: -- cgit v1.2.3 From ce540f2571a449a3620bd3672bfb93b39cef71e1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:25:44 -0700 Subject: Rename identifier from 'i' to 'node'. Now that we no longer have nested for loops with 'i' and 'j' we can use the 'node' that we already have. --- glcpp-parse.y | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 04e78b1826f..5f59b5b006f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -807,7 +807,6 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, argument_list_t *arguments; function_status_t status; token_list_t *substituted; - token_node_t *i, *j; int parameter_index; node = *node_ret; @@ -851,10 +850,11 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, /* Perform argument substitution on the replacement list. */ substituted = _token_list_create (arguments); - for (i = macro->replacements->head; i; i = i->next) { - if (i->token->type == IDENTIFIER && + for (node = macro->replacements->head; node; node = node->next) + { + if (node->token->type == IDENTIFIER && _string_list_contains (macro->parameters, - i->token->value.str, + node->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -866,7 +866,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, argument, substituted); } else { - _token_list_append (substituted, i->token); + _token_list_append (substituted, node->token); } } -- cgit v1.2.3 From ad0dee6bb0f197b9addb45f38e8843d6a504723c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:04:50 -0700 Subject: Implement token pasting. Which makes test 40 now pass. --- glcpp-parse.y | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5f59b5b006f..330d3ab3bc4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -586,6 +586,86 @@ _token_print (token_t *token) } } +/* Change 'token' into a new token formed by pasting 'other'. */ +static void +_token_paste (token_t *token, token_t *other) +{ + /* A very few single-character punctuators can be combined + * with another to form a multi-character punctuator. */ + switch (token->type) { + case '<': + if (other->type == '<') { + token->type = LEFT_SHIFT; + token->value.ival = LEFT_SHIFT; + return; + } else if (other->type == '=') { + token->type = LESS_OR_EQUAL; + token->value.ival = LESS_OR_EQUAL; + return; + } + break; + case '>': + if (other->type == '>') { + token->type = RIGHT_SHIFT; + token->value.ival = RIGHT_SHIFT; + return; + } else if (other->type == '=') { + token->type = GREATER_OR_EQUAL; + token->value.ival = GREATER_OR_EQUAL; + return; + } + break; + case '=': + if (other->type == '=') { + token->type = EQUAL; + token->value.ival = EQUAL; + return; + } + break; + case '!': + if (other->type == '=') { + token->type = NOT_EQUAL; + token->value.ival = NOT_EQUAL; + return; + } + break; + case '&': + if (other->type == '&') { + token->type = AND; + token->value.ival = AND; + return; + } + break; + case '|': + if (other->type == '|') { + token->type = OR; + token->value.ival = OR; + return; + } + break; + } + + /* Two string-valued tokens can usually just be mashed + * together. + * + * XXX: Since our 'OTHER' case is currently so loose, this may + * allow some things thruogh that should be treated as + * errors. */ + if ((token->type == IDENTIFIER || token->type == OTHER) && + (other->type == IDENTIFIER || other->type == OTHER)) + { + token->value.str = talloc_strdup_append (token->value.str, + other->value.str); + return; + } + + printf ("Error: Pasting \""); + _token_print (token); + printf ("\" and \""); + _token_print (other); + printf ("\" does not give a valid preprocessing token.\n"); +} + static void _token_list_print (token_list_t *list) { @@ -870,6 +950,43 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, } } + /* After argument substitution, and before further expansion + * below, implement token pasting. */ + + node = substituted->head; + while (node) + { + token_node_t *next_non_space; + + /* Look ahead for a PASTE token, skipping space. */ + next_non_space = node->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) + break; + + if (next_non_space->token->type != PASTE) { + node = next_non_space; + continue; + } + + /* Now find the next non-space token after the PASTE. */ + next_non_space = next_non_space->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) { + fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); + exit (1); + } + + _token_paste (node->token, next_non_space->token); + node->next = next_non_space->next; + + node = node->next; + } + _string_list_push (parser->active, identifier); _glcpp_parser_expand_token_list_onto (parser, substituted, result); _string_list_pop (parser->active); -- cgit v1.2.3 From 8fed1cddae8b024972d0c08f120bfd0292cb9cca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:12 -0700 Subject: stash --- glcpp-lex.l | 40 +++++++++++++++++ glcpp-parse.y | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 176 insertions(+), 5 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 516f42dee32..97f01d06368 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -62,11 +62,47 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return HASH_UNDEF; } +{HASH}if { + yyextra->space_tokens = 0; + return HASH_IF; +} + +{HASH}elif { + yyextra->space_tokens = 0; + return HASH_ELIF; +} + +{HASH}else { + yyextra->space_tokens = 0; + return HASH_ELSE; +} + +{HASH}endif { + yyextra->space_tokens = 0; + return HASH_ENDIF; +} + {HASH} { yyextra->space_tokens = 0; return HASH; } +{DECIMAL_INTEGER} { + yylval.ival = strtoll (yytext, NULL, 10); + return INTEGER; +} + +{OCTAL_INTEGER} { + yylval.ival = strtoll (yytext + 1, NULL, 8); + return INTEGER; +} + +{HEXADECIMAL_INTEGER} { + yylval.ival = strtoll (yytext + 2, NULL, 16); + return INTEGER; +} + + {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; @@ -108,6 +144,10 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return PASTE; } +"defined" { + return DEFINED; +} + {PUNCTUATION} { return yytext[0]; } diff --git a/glcpp-parse.y b/glcpp-parse.y index 330d3ab3bc4..58e1e655fdb 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,6 +25,7 @@ #include #include #include +#include #include "glcpp.h" @@ -124,27 +125,46 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE -%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -%type punctuator SPACE +%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token PASTE +%type expression INTEGER punctuator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY %% input: /* empty */ | input line { - printf ("\n"); + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + printf ("\n"); + } } ; line: control_line | text_line { - _glcpp_parser_print_expanded_token_list (parser, $1); + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_print_expanded_token_list (parser, $1); + } talloc_free ($1); } | HASH non_directive @@ -171,9 +191,114 @@ control_line: } talloc_free ($2); } +| HASH_IF expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| HASH_IFDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro != NULL); + } +| HASH_IFNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro == NULL); + } +| HASH_ELIF expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); + } +| HASH_ELSE NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "else", 1); + } +| HASH_ENDIF NEWLINE { + _glcpp_parser_skip_stack_pop (parser); + } | HASH NEWLINE ; +expression: + INTEGER { + $$ = $1; + } +| expression OR expression { + $$ = $1 || $3; + } +| expression AND expression { + $$ = $1 && $3; + } +| expression '|' expression { + $$ = $1 | $3; + } +| expression '^' expression { + $$ = $1 ^ $3; + } +| expression '&' expression { + $$ = $1 & $3; + } +| expression NOT_EQUAL expression { + $$ = $1 != $3; + } +| expression EQUAL expression { + $$ = $1 == $3; + } +| expression GREATER_OR_EQUAL expression { + $$ = $1 >= $3; + } +| expression LESS_OR_EQUAL expression { + $$ = $1 <= $3; + } +| expression '>' expression { + $$ = $1 > $3; + } +| expression '<' expression { + $$ = $1 < $3; + } +| expression RIGHT_SHIFT expression { + $$ = $1 >> $3; + } +| expression LEFT_SHIFT expression { + $$ = $1 << $3; + } +| expression '-' expression { + $$ = $1 - $3; + } +| expression '+' expression { + $$ = $1 + $3; + } +| expression '%' expression { + $$ = $1 % $3; + } +| expression '/' expression { + $$ = $1 / $3; + } +| expression '*' expression { + $$ = $1 * $3; + } +| '!' expression %prec UNARY { + $$ = ! $2; + } +| '~' expression %prec UNARY { + $$ = ~ $2; + } +| '-' expression %prec UNARY { + $$ = - $2; + } +| '+' expression %prec UNARY { + $$ = + $2; + } +| DEFINED IDENTIFIER %prec UNARY { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + if (macro) + $$ = 1; + else + $$ = 0; + } +| '(' expression ')' { + $$ = $2; + } +; + identifier_list: IDENTIFIER { $$ = _string_list_create (parser); @@ -219,6 +344,9 @@ preprocessing_token: IDENTIFIER { $$ = _token_create_str (parser, IDENTIFIER, $1); } +| INTEGER { + $$ = _token_create_ival (parser, INTEGER, $1); + } | punctuator { $$ = _token_create_ival (parser, $1, $1); } @@ -546,6 +674,9 @@ _token_print (token_t *token) } switch (token->type) { + case INTEGER: + printf ("%" PRIxMAX, token->value.ival); + break; case IDENTIFIER: case OTHER: printf ("%s", token->value.str); -- cgit v1.2.3 From f6914fd37b2b66d7be1ba0c31450d89d1785ccce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:57 -0700 Subject: Implement #if and friends. With this change, tests 41 through 49 all pass. (The defined operator appears to be somehow broken so that test 50 doesn't pass yet.) --- glcpp.h | 2 +- tests/049-if-expression-precedence.c | 1 - tests/050-if-defined.c | 2 -- tests/glcpp-test | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/glcpp.h b/glcpp.h index 21db918cdce..36ab0e7ca5c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -49,7 +49,7 @@ typedef struct token_list token_list_t; typedef union YYSTYPE { - int ival; + intmax_t ival; char *str; string_list_t *string_list; token_t *token; diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c index cea935220fd..833ea03882a 100644 --- a/tests/049-if-expression-precedence.c +++ b/tests/049-if-expression-precedence.c @@ -3,4 +3,3 @@ failure with operator precedence #else success #endif - diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c index 9838cc747d5..34f0f95140e 100644 --- a/tests/050-if-defined.c +++ b/tests/050-if-defined.c @@ -15,5 +15,3 @@ failure_3 #else success_3 #endif - - diff --git a/tests/glcpp-test b/tests/glcpp-test index 63041552104..bf88d4462e1 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -6,5 +6,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -B -u $test.expected $test.out done -- cgit v1.2.3 From 16c1e980e2e3c8852ce9bea85afe094c24e420fa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:35:34 -0700 Subject: Fix lexing of "defined" as an operator, not an identifier. Simply need to move the rule for IDENTIFIER to be after "defined" and everything is happy. With this change, tests 50 through 53 all pass now. --- glcpp-lex.l | 11 +++++------ tests/053-if-divide-and-shift.c | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 97f01d06368..d6b7726d36d 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -102,12 +102,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return INTEGER; } - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - "<<" { return LEFT_SHIFT; } @@ -148,6 +142,11 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return DEFINED; } +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + {PUNCTUATION} { return yytext[0]; } diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c index ddc1573ab26..d24c54a88d1 100644 --- a/tests/053-if-divide-and-shift.c +++ b/tests/053-if-divide-and-shift.c @@ -13,4 +13,3 @@ failure_3 #else success_3 #endif - -- cgit v1.2.3 From 8e82fcb070d5fae0ec2c763cee4cea225b459664 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 11:15:21 -0700 Subject: Implement (and test) support for macro expansion within conditional expressions. To do this we have split the existing "HASH_IF expression" into two productions: First is HASH_IF pp_tokens which simply constructs a list of tokens. Then, with that resulting token list, we first evaluate all DEFINED operator tokens, then expand all macros, and finally start lexing from the resulting token list. This brings us to the second production, IF_EXPANDED expression This final production works just like our previous "HASH_IF expression", evaluating a constant integer expression. The new test (54) added for this case now passes. --- glcpp-parse.y | 155 ++++++++++++++++++++++++++++++++++++++------- glcpp.h | 2 + tests/054-if-with-macros.c | 34 ++++++++++ 3 files changed, 169 insertions(+), 22 deletions(-) create mode 100644 tests/054-if-with-macros.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 58e1e655fdb..cce8a70156f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -96,6 +96,10 @@ _token_list_append (token_list_t *list, token_t *token); static void _token_list_append_list (token_list_t *list, token_list_t *tail); +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list); + static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); @@ -120,14 +124,17 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); static int glcpp_parser_lex (glcpp_parser_t *parser); +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); + %} %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE -%type expression INTEGER punctuator SPACE +%type expression INTEGER operator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token @@ -148,28 +155,39 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line { +| input line +; + +line: + control_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { printf ("\n"); } } -; - -line: - control_line | text_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { _glcpp_parser_print_expanded_token_list (parser, $1); + printf ("\n"); } talloc_free ($1); } +| expanded_line | HASH non_directive ; +expanded_line: + IF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| ELIF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "elif", $2); + } +; + control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); @@ -191,8 +209,17 @@ control_line: } talloc_free ($2); } -| HASH_IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); +| HASH_IF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); @@ -204,8 +231,17 @@ control_line: talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } -| HASH_ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); +| HASH_ELIF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { _glcpp_parser_skip_stack_change_if (parser, "else", 1); @@ -286,14 +322,6 @@ expression: | '+' expression %prec UNARY { $$ = + $2; } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } | '(' expression ')' { $$ = $2; } @@ -347,7 +375,7 @@ preprocessing_token: | INTEGER { $$ = _token_create_ival (parser, INTEGER, $1); } -| punctuator { +| operator { $$ = _token_create_ival (parser, $1, $1); } | OTHER { @@ -358,7 +386,7 @@ preprocessing_token: } ; -punctuator: +operator: '[' { $$ = '['; } | ']' { $$ = ']'; } | '(' { $$ = '('; } @@ -389,6 +417,7 @@ punctuator: | ';' { $$ = ';'; } | ',' { $$ = ','; } | PASTE { $$ = PASTE; } +| DEFINED { $$ = DEFINED; } ; %% @@ -830,6 +859,9 @@ glcpp_parser_create (void) parser->skip_stack = NULL; + parser->lex_from_list = NULL; + parser->lex_from_node = NULL; + return parser; } @@ -849,6 +881,39 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +/* Replace any occurences of DEFINED tokens in 'list' with either a + * '0' or '1' INTEGER token depending on whether the next token in the + * list is defined or not. */ +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node, *next; + string_list_t *macro; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + if (node->token->type != DEFINED) + continue; + next = node->next; + while (next && next->token->type == SPACE) + next = next->next; + if (next == NULL || next->token->type != IDENTIFIER) { + fprintf (stderr, "Error: operator \"defined\" requires an identifier\n"); + exit (1); + } + macro = hash_table_find (parser->defines, + next->token->value.str); + + node->token->type = INTEGER; + node->token->value.ival = (macro != NULL); + node->next = next->next; + } +} + + /* Appends onto 'expansion' a non-macro token or the expansion of an * object-like macro. * @@ -1206,7 +1271,53 @@ _define_function_macro (glcpp_parser_t *parser, static int glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (parser->scanner); + token_node_t *node; + int ret; + + if (parser->lex_from_list == NULL) + return glcpp_lex (parser->scanner); + + node = parser->lex_from_node; + + if (node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + return NEWLINE; + } + + yylval = node->token->value; + ret = node->token->type; + + parser->lex_from_node = node->next; + + return ret; +} + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + assert (parser->lex_from_list == NULL); + + /* Copy list, eliminating any space tokens. */ + parser->lex_from_list = _token_list_create (parser); + + for (node = list->head; node; node = node->next) { + if (node->token->type == SPACE) + continue; + _token_list_append (parser->lex_from_list, node->token); + } + + talloc_free (list); + + parser->lex_from_node = parser->lex_from_list->head; + + /* It's possible the list consisted of nothing but whitespace. */ + if (parser->lex_from_node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + } } static void diff --git a/glcpp.h b/glcpp.h index 36ab0e7ca5c..e5be1a6cd62 100644 --- a/glcpp.h +++ b/glcpp.h @@ -129,6 +129,8 @@ struct glcpp_parser { string_list_t *active; int space_tokens; skip_node_t *skip_stack; + token_list_t *lex_from_list; + token_node_t *lex_from_node; }; glcpp_parser_t * diff --git a/tests/054-if-with-macros.c b/tests/054-if-with-macros.c new file mode 100644 index 00000000000..3da79a0d96e --- /dev/null +++ b/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif -- cgit v1.2.3 From 0324cad796b7a68634a729719f08fcbb5bbd04cc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:53:05 -0700 Subject: All macro lookups should be of type macro_t, not string_list_t. This is what I get for using a non-type-safe hash-table implementation. --- glcpp-parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index cce8a70156f..a809ebf3af5 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -199,7 +199,7 @@ control_line: _define_function_macro (parser, $2, $4, $6); } | HASH_UNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); if (macro) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -222,12 +222,12 @@ control_line: glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro != NULL); } | HASH_IFNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } @@ -889,7 +889,7 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node, *next; - string_list_t *macro; + macro_t *macro; if (list == NULL) return; -- cgit v1.2.3 From 95951ea7bb8728cf54ae4136cb59d0af9e8a06bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:57:10 -0700 Subject: Treat newlines as space when invoking a function-like macro invocation. This adds three new pieces of state to the parser, (is_control_line, newline_as_space, and paren_count), and a large amount of messy code. I'd definitely like to see a cleaner solution for this. With this fix, the "define-func-extra-newlines" now passes so we put it back to test #26 where it was originally (lately it has been known as test #55). Also, we tweak test 25 slightly. Previously this test was ending a file function-like macro name that was not actually a macro (not followed by a left parenthesis). As is, this fix was making that test fail because the text_line production expects to see a terminating NEWLINE, but that NEWLINE is now getting turned into a SPACE here. This seems unlikely to be a problem in the wild, (function macros being used in a non-macro sense seems rare enough---but more than likely they won't happen at the end of a file). Still, we document this shortcoming in the README. --- README | 4 +++ glcpp-parse.y | 61 ++++++++++++++++++++++++++++++++-- glcpp.h | 3 ++ tests/025-func-macro-as-non-macro.c | 2 +- tests/026-define-func-extra-newlines.c | 6 ++++ tests/055-define-func-extra-newlines.c | 6 ---- 6 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 tests/026-define-func-extra-newlines.c delete mode 100644 tests/055-define-func-extra-newlines.c diff --git a/README b/README index f0f64c2644a..ab42a3ffe12 100644 --- a/README +++ b/README @@ -24,3 +24,7 @@ parentheses. The #error, #pragma, #extension, #version, and #line macros are not yet supported. + +A file that ends with a function-like macro name as the last +non-whitespace token will result in a parse error, (where it should be +passed through as is). \ No newline at end of file diff --git a/glcpp-parse.y b/glcpp-parse.y index a809ebf3af5..1346b65aff6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -856,6 +856,9 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; + parser->newline_as_space = 0; + parser->in_control_line = 0; + parser->paren_count = 0; parser->skip_stack = NULL; @@ -1274,8 +1277,62 @@ glcpp_parser_lex (glcpp_parser_t *parser) token_node_t *node; int ret; - if (parser->lex_from_list == NULL) - return glcpp_lex (parser->scanner); + if (parser->lex_from_list == NULL) { + ret = glcpp_lex (parser->scanner); + + /* XXX: This ugly block of code exists for the sole + * purpose of converting a NEWLINE token into a SPACE + * token, but only in the case where we have seen a + * function-like macro name, but have not yet seen its + * closing parenthesis. + * + * There's perhaps a more compact way to do this with + * mid-rule actions in the grammar. + * + * I'm definitely not pleased with the complexity of + * this code here. + */ + if (parser->newline_as_space) + { + if (ret == '(') { + parser->paren_count++; + } else if (ret == ')') { + parser->paren_count--; + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } else if (ret == NEWLINE) { + ret = SPACE; + } else if (ret != SPACE) { + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } + } + else if (parser->in_control_line) + { + if (ret == NEWLINE) + parser->in_control_line = 0; + } + else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC || + ret == HASH_UNDEF || ret == HASH_IF || + ret == HASH_IFDEF || ret == HASH_IFNDEF || + ret == HASH_ELIF || ret == HASH_ELSE || + ret == HASH_ENDIF || ret == HASH) + { + parser->in_control_line = 1; + } + else if (ret == IDENTIFIER) + { + macro_t *macro; + macro = hash_table_find (parser->defines, + yylval.str); + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } + } + + return ret; + } node = parser->lex_from_node; diff --git a/glcpp.h b/glcpp.h index e5be1a6cd62..5c8c304a9ca 100644 --- a/glcpp.h +++ b/glcpp.h @@ -128,6 +128,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; + int newline_as_space; + int in_control_line; + int paren_count; skip_node_t *skip_stack; token_list_t *lex_from_list; token_node_t *lex_from_node; diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c index 3dbe026d9dd..b433671d1bf 100644 --- a/tests/025-func-macro-as-non-macro.c +++ b/tests/025-func-macro-as-non-macro.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo +foo bar diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c new file mode 100644 index 00000000000..0d837405309 --- /dev/null +++ b/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/tests/055-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c deleted file mode 100644 index 0d837405309..00000000000 --- a/tests/055-define-func-extra-newlines.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo(a) bar - -foo -( -1 -) -- cgit v1.2.3 From 7db2402a8009772a3f10d19cfc7f30be9ee79295 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 17:01:57 -0700 Subject: Add support (and test) for an object-to-function chain with the parens in the content. That is, the following case: #define foo(x) (x) #define bar bar(baz) which now works with this (ugly) commit. I definitely want to come up with something cleaner than this. --- glcpp-parse.y | 65 ++++++++++++++++------ .../055-define-chain-obj-to-func-parens-in-text.c | 3 + 2 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 1346b65aff6..abdcd1ed5d8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -926,9 +926,9 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, * needs further expansion. */ static int -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) +_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; @@ -1075,10 +1075,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) /* Prints the expansion of *node (consuming further tokens from the * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ -static void -_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +static function_status_t +_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; @@ -1103,7 +1103,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, break; case FUNCTION_NOT_A_FUNCTION: _token_list_append (result, node->token); - return; + return FUNCTION_NOT_A_FUNCTION; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", identifier); @@ -1112,7 +1112,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (macro->replacements == NULL) { talloc_free (arguments); - return; + return FUNCTION_STATUS_SUCCESS; } if (_argument_list_length (arguments) != @@ -1123,7 +1123,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return; + exit (1); } /* Perform argument substitution on the replacement list. */ @@ -1191,6 +1191,8 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, _string_list_pop (parser->active); talloc_free (arguments); + + return FUNCTION_STATUS_SUCCESS; } static void @@ -1199,19 +1201,50 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *result) { token_node_t *node; + token_list_t *intermediate, *list_orig = list; + int i, need_rescan = 0; if (list == NULL) return; - for (node = list->head; node; node = node->next) - { - if (_glcpp_parser_expand_token_onto (parser, node->token, - result)) + intermediate = _token_list_create (parser); + + /* XXX: The two-pass expansion here is really ugly. The + * problem this is solving is that we can expand a macro into + * a function-like macro name, and then we need to recognize + * that as a function-like macro, but perhaps the parentheses + * and arguments aren't on the token list yet, (since they are + * in the actual content so they are part of what we are + * expanding. + * + * This ugly hack works, but is messy, fragile, and hard to + * maintain. I think a cleaner solution would separate the + * notions of expanding and appending and avoid this problem + * altogether. + */ + + for (i = 0; i < 2; i++) { + if (i == 1) { + list = intermediate; + intermediate = _token_list_create (parser); + } + for (node = list->head; node; node = node->next) { - _glcpp_parser_expand_function_onto (parser, &node, - result); + if (_expand_token_onto (parser, node->token, + intermediate)) + { + if (_expand_function_onto (parser, &node, + intermediate)) + { + need_rescan = 1; + } + } } + if (list != list_orig) + talloc_free (list); } + + _token_list_append_list (result, intermediate); } void diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c b/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 00000000000..00f2c2346d6 --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() -- cgit v1.2.3 From dd7490093d84ce74a99922c3544b51c3f5d43345 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 10:12:33 -0700 Subject: Avoid treating an expanded comma as an argument separator. That is, a function-like invocation foo(x) is valid as a single-argument invocation even if 'x' is a macro that expands into a value with a comma. Add a new COMMA_FINAL token type to handle this, and add a test for this case, (which passes). --- glcpp-parse.y | 18 ++++++++++++++++-- tests/056-macro-argument-with-comma.c | 4 ++++ 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/056-macro-argument-with-comma.c diff --git a/glcpp-parse.y b/glcpp-parse.y index abdcd1ed5d8..b2684d06d98 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,7 +132,7 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE %type expression INTEGER operator SPACE %type IDENTIFIER OTHER @@ -740,6 +740,9 @@ _token_print (token_t *token) case PASTE: printf ("##"); break; + case COMMA_FINAL: + printf (","); + break; default: fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); break; @@ -936,7 +939,18 @@ _expand_token_onto (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_list_append (result, token); + /* We change any COMMA into a COMMA_FINAL to prevent + * it being mistaken for an argument separator + * later. */ + if (token->type == ',') { + token_t *new_token; + + new_token = _token_create_ival (result, COMMA_FINAL, + COMMA_FINAL); + _token_list_append (result, new_token); + } else { + _token_list_append (result, token); + } return 0; } diff --git a/tests/056-macro-argument-with-comma.c b/tests/056-macro-argument-with-comma.c new file mode 100644 index 00000000000..58701d1f25b --- /dev/null +++ b/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) -- cgit v1.2.3 From a65cf7b1d29e98ef3bf31051df8a06cb394d131f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 11:55:36 -0700 Subject: Make two list-processing functions do nothing with an empty list. This just makes these functions easier to understand all around. In the case of _token_list_append_list this is an actual bug fix, (where append an empty list onto a non-empty list would previously scramble the tail pointer of the original list). --- glcpp-parse.y | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index b2684d06d98..ba79a611f6e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -666,6 +666,9 @@ _token_list_append (token_list_t *list, token_t *token) void _token_list_append_list (token_list_t *list, token_list_t *tail) { + if (tail == NULL || tail->head == NULL) + return; + if (list->head == NULL) { list->head = tail->head; } else { @@ -1218,7 +1221,7 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *intermediate, *list_orig = list; int i, need_rescan = 0; - if (list == NULL) + if (list == NULL || list->head == NULL) return; intermediate = _token_list_create (parser); -- cgit v1.2.3 From a19297b26e971e5a9dbe00b4254931505da4b5a9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 13:29:19 -0700 Subject: Provide support for empty arguments in macro invocations. For this we always add a new argument to the argument list as soon as possible, without waiting until we see some argument token. This does mean we need to take some extra care when comparing the number of arguments with the number of expected arguments. In addition to matching numbers, we also support one (empty) argument when zero arguments are expected. Add a test case here for this, which does pass. --- glcpp-parse.y | 20 +++++++++++--------- tests/057-empty-arguments.c | 6 ++++++ 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 tests/057-empty-arguments.c diff --git a/glcpp-parse.y b/glcpp-parse.y index ba79a611f6e..3e0a96528b4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1044,7 +1044,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) last = node; node = node->next; - argument = NULL; + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); for (paren_count = 1; node; last = node, node = node->next) { if (node->token->type == '(') @@ -1064,18 +1065,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { - if (argument) - _token_list_trim_trailing_space (argument); - argument = NULL; + _token_list_trim_trailing_space (argument); + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); } else { - if (argument == NULL) { + if (argument->head == NULL) { /* Don't treat initial whitespace as * part of the arguement. */ if (node->token->type == SPACE) continue; - argument = _token_list_create (arguments); - _argument_list_append (arguments, argument); } _token_list_append (argument, node->token); } @@ -1132,8 +1131,11 @@ _expand_function_onto (glcpp_parser_t *parser, return FUNCTION_STATUS_SUCCESS; } - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) + if (! ((_argument_list_length (arguments) == + _string_list_length (macro->parameters)) || + (_string_list_length (macro->parameters) == 0 && + _argument_list_length (arguments) == 1 && + arguments->head->argument->head == NULL))) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", diff --git a/tests/057-empty-arguments.c b/tests/057-empty-arguments.c new file mode 100644 index 00000000000..6140232865d --- /dev/null +++ b/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) -- cgit v1.2.3 From 85b50e840d969c4d9ebcfcc3df1df7a95e07e34e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:01:18 -0700 Subject: Add placeholder tokens to support pasting with empty arguments. Along with a passing test to verify that this works. --- glcpp-parse.y | 36 +++++++++++++++++++++++++------ tests/058-token-pasting-empty-arguments.c | 5 +++++ 2 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 tests/058-token-pasting-empty-arguments.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 3e0a96528b4..d587a4bf338 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,7 +132,7 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER PLACEHOLDER SPACE %token PASTE %type expression INTEGER operator SPACE %type IDENTIFIER OTHER @@ -746,6 +746,9 @@ _token_print (token_t *token) case COMMA_FINAL: printf (","); break; + case PLACEHOLDER: + /* Nothing to print. */ + break; default: fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); break; @@ -756,6 +759,17 @@ _token_print (token_t *token) static void _token_paste (token_t *token, token_t *other) { + /* Pasting a placeholder onto anything makes no change. */ + if (other->type == PLACEHOLDER) + return; + + /* When 'token' is a placeholder, just return contents of 'other'. */ + if (token->type == PLACEHOLDER) { + token->type = other->type; + token->value = other->value; + return; + } + /* A very few single-character punctuators can be combined * with another to form a multi-character punctuator. */ switch (token->type) { @@ -1159,10 +1173,20 @@ _expand_function_onto (glcpp_parser_t *parser, argument = _argument_list_member_at (arguments, parameter_index); /* Before substituting, we expand the argument - * tokens. */ - _glcpp_parser_expand_token_list_onto (parser, - argument, - substituted); + * tokens, or append a placeholder token for + * an empty argument. */ + if (argument->head) { + _glcpp_parser_expand_token_list_onto (parser, + argument, + substituted); + } else { + token_t *new_token; + + new_token = _token_create_ival (substituted, + PLACEHOLDER, + PLACEHOLDER); + _token_list_append (substituted, new_token); + } } else { _token_list_append (substituted, node->token); } @@ -1196,7 +1220,7 @@ _expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - exit (1); + return FUNCTION_STATUS_SUCCESS; } _token_paste (node->token, next_non_space->token); diff --git a/tests/058-token-pasting-empty-arguments.c b/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 00000000000..8ac260c76b6 --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) -- cgit v1.2.3 From 050e3ded1ea05cfe336dd0cd20212d17d7960c9e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:36:29 -0700 Subject: Implement token pasting of integers. To do this correctly, we change the lexer to lex integers as string values, (new token type of INTEGER_STRING), and only convert to integer values when evaluating an expression value. Add a new test case for this, (which does pass now). --- Makefile | 2 +- glcpp-lex.l | 12 ++++++------ glcpp-parse.y | 32 ++++++++++++++++++++++---------- tests/059-token-pasting-integer.c | 4 ++++ 4 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 tests/059-token-pasting-integer.c diff --git a/Makefile b/Makefile index 88116128f85..0c06aa880fb 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ CFLAGS = -g override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o - gcc -o $@ -ltalloc $^ + gcc -o $@ -ltalloc -lm $^ %.c %.h: %.y bison --debug --defines=$*.h --output=$*.c $^ diff --git a/glcpp-lex.l b/glcpp-lex.l index d6b7726d36d..70d47d24975 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -88,18 +88,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } {DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } {OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } {HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } "<<" { diff --git a/glcpp-parse.y b/glcpp-parse.y index d587a4bf338..5b2d0d3927a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,10 +132,10 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER PLACEHOLDER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING NEWLINE OTHER PLACEHOLDER SPACE %token PASTE %type expression INTEGER operator SPACE -%type IDENTIFIER OTHER +%type IDENTIFIER INTEGER_STRING OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -253,7 +253,16 @@ control_line: ; expression: - INTEGER { + INTEGER_STRING { + if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) { + $$ = strtoll ($1 + 2, NULL, 16); + } else if ($1[0] == '0') { + $$ = strtoll ($1, NULL, 8); + } else { + $$ = strtoll ($1, NULL, 10); + } + } +| INTEGER { $$ = $1; } | expression OR expression { @@ -372,8 +381,8 @@ preprocessing_token: IDENTIFIER { $$ = _token_create_str (parser, IDENTIFIER, $1); } -| INTEGER { - $$ = _token_create_ival (parser, INTEGER, $1); +| INTEGER_STRING { + $$ = _token_create_str (parser, INTEGER_STRING, $1); } | operator { $$ = _token_create_ival (parser, $1, $1); @@ -710,6 +719,7 @@ _token_print (token_t *token) printf ("%" PRIxMAX, token->value.ival); break; case IDENTIFIER: + case INTEGER_STRING: case OTHER: printf ("%s", token->value.str); break; @@ -828,11 +838,13 @@ _token_paste (token_t *token, token_t *other) /* Two string-valued tokens can usually just be mashed * together. * - * XXX: Since our 'OTHER' case is currently so loose, this may - * allow some things thruogh that should be treated as - * errors. */ - if ((token->type == IDENTIFIER || token->type == OTHER) && - (other->type == IDENTIFIER || other->type == OTHER)) + * XXX: This isn't actually legitimate. Several things here + * should result in a diagnostic since the result cannot be a + * valid, single pre-processing token. For example, pasting + * "123" and "abc" is not legal, but we don't catch that + * here. */ + if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) && + (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING)) { token->value.str = talloc_strdup_append (token->value.str, other->value.str); diff --git a/tests/059-token-pasting-integer.c b/tests/059-token-pasting-integer.c new file mode 100644 index 00000000000..37b895a4237 --- /dev/null +++ b/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) -- cgit v1.2.3 From baa17c87485b5e776ec142844f5df38a3df9dccc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:53:51 -0700 Subject: Remove blank lines from output files before comparing. Recently I'm seeing cases where "gcc -E" mysteriously omits blank lines, (even though it prints the blank lines in other very similar cases). Rather than trying to decipher and imitate this, just get rid of the blank lines. This approach with sed to kill the lines before the diff is better than "diff -B" since when there is an actual difference, the presence of blank lines won't make the diff harder to read. --- .gitignore | 1 + tests/glcpp-test | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index d67bd38c93c..b88f0cc75c7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ glcpp-parse.h *~ tests/*.expected tests/*.gcc +tests/*.glcpp tests/*.out diff --git a/tests/glcpp-test b/tests/glcpp-test index bf88d4462e1..92c994979a9 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -3,8 +3,9 @@ set -e for test in *.c; do echo "Testing $test" - ../glcpp < $test > $test.out + ../glcpp < $test > $test.glcpp + grep -v '^$' < $test.glcpp > $test.out || true gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected - diff -B -u $test.expected $test.out + grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true + diff -u $test.expected $test.out done -- cgit v1.2.3 From 95ec433d59be234cf2695ae091cee4ace3314d21 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:00:43 -0700 Subject: Revert "Add support for an object-to-function chain with the parens in the content." This reverts commit 7db2402a8009772a3f10d19cfc7f30be9ee79295 It doesn't revert the new test case from that commit, just the extremely ugly second-pass implementation. --- glcpp-parse.y | 65 +++++++++++++++-------------------------------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5b2d0d3927a..f4cb72a133f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -958,9 +958,9 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, * needs further expansion. */ static int -_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; @@ -1117,10 +1117,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) /* Prints the expansion of *node (consuming further tokens from the * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ -static function_status_t -_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +static void +_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; @@ -1145,7 +1145,7 @@ _expand_function_onto (glcpp_parser_t *parser, break; case FUNCTION_NOT_A_FUNCTION: _token_list_append (result, node->token); - return FUNCTION_NOT_A_FUNCTION; + return; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", identifier); @@ -1154,7 +1154,7 @@ _expand_function_onto (glcpp_parser_t *parser, if (macro->replacements == NULL) { talloc_free (arguments); - return FUNCTION_STATUS_SUCCESS; + return; } if (! ((_argument_list_length (arguments) == @@ -1168,7 +1168,7 @@ _expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - exit (1); + return; } /* Perform argument substitution on the replacement list. */ @@ -1246,8 +1246,6 @@ _expand_function_onto (glcpp_parser_t *parser, _string_list_pop (parser->active); talloc_free (arguments); - - return FUNCTION_STATUS_SUCCESS; } static void @@ -1256,50 +1254,19 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *result) { token_node_t *node; - token_list_t *intermediate, *list_orig = list; - int i, need_rescan = 0; if (list == NULL || list->head == NULL) return; - intermediate = _token_list_create (parser); - - /* XXX: The two-pass expansion here is really ugly. The - * problem this is solving is that we can expand a macro into - * a function-like macro name, and then we need to recognize - * that as a function-like macro, but perhaps the parentheses - * and arguments aren't on the token list yet, (since they are - * in the actual content so they are part of what we are - * expanding. - * - * This ugly hack works, but is messy, fragile, and hard to - * maintain. I think a cleaner solution would separate the - * notions of expanding and appending and avoid this problem - * altogether. - */ - - for (i = 0; i < 2; i++) { - if (i == 1) { - list = intermediate; - intermediate = _token_list_create (parser); - } - for (node = list->head; node; node = node->next) + for (node = list->head; node; node = node->next) + { + if (_glcpp_parser_expand_token_onto (parser, node->token, + result)) { - if (_expand_token_onto (parser, node->token, - intermediate)) - { - if (_expand_function_onto (parser, &node, - intermediate)) - { - need_rescan = 1; - } - } + _glcpp_parser_expand_function_onto (parser, &node, + result); } - if (list != list_orig) - talloc_free (list); } - - _token_list_append_list (result, intermediate); } void -- cgit v1.2.3 From 9b519f9c7997e0ec02c66d39edc12912aebb9eca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:04:13 -0700 Subject: Stop interrupting the test suite at the first failure. This behavior was useful when starting the implementation over ("take-2") where the whole test suite was failing. This made it easy to focus on one test at a time and get each working. More recently, we got the whole suite working, so we don't need this feature anymore. And in the previous commit, we regressed a couple of tests, so it's nice to be able to see all the failures with a single run of the suite. --- tests/glcpp-test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/glcpp-test b/tests/glcpp-test index 92c994979a9..ba398af0d54 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,5 +1,4 @@ #!/bin/sh -set -e for test in *.c; do echo "Testing $test" -- cgit v1.2.3 From 3c93d397050bbeccb7809e53a425c860df947c45 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:17:46 -0700 Subject: Simplify calling conventions of functions under expand_token_list_onto. We previously had a confusing thing where _expand_token_onto would return a non-zero value to indicate that the caller should then call _expand_function_onto. It's much cleaner for _expand_token_onto to just do what's needed and call the necessary function. --- glcpp-parse.y | 159 +++++++++++++++++++++++++++------------------------------- 1 file changed, 74 insertions(+), 85 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index f4cb72a133f..9f97b2a282a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -948,81 +948,6 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, } } - -/* Appends onto 'expansion' a non-macro token or the expansion of an - * object-like macro. - * - * Returns 0 if this token is completely processed. - * - * Returns 1 in the case that 'token' is a function-like macro that - * needs further expansion. - */ -static int -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) -{ - const char *identifier; - macro_t *macro; - token_list_t *expansion; - - /* We only expand identifiers */ - if (token->type != IDENTIFIER) { - /* We change any COMMA into a COMMA_FINAL to prevent - * it being mistaken for an argument separator - * later. */ - if (token->type == ',') { - token_t *new_token; - - new_token = _token_create_ival (result, COMMA_FINAL, - COMMA_FINAL); - _token_list_append (result, new_token); - } else { - _token_list_append (result, token); - } - return 0; - } - - /* Look up this identifier in the hash table. */ - identifier = token->value.str; - macro = hash_table_find (parser->defines, identifier); - - /* Not a macro, so just append. */ - if (macro == NULL) { - _token_list_append (result, token); - return 0; - } - - /* Finally, don't expand this macro if we're already actively - * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) - { - /* We change the token type here from IDENTIFIER to - * OTHER to prevent any future expansion of this - * unexpanded token. */ - char *str; - token_t *new_token; - - str = xtalloc_strdup (result, token->value.str); - new_token = _token_create_str (result, OTHER, str); - _token_list_append (result, new_token); - return 0; - } - - /* For function-like macros return 1 for further processing. */ - if (macro->is_function) { - return 1; - } - - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, - macro->replacements, - result); - _string_list_pop (parser->active); - - return 0; -} - typedef enum function_status { FUNCTION_STATUS_SUCCESS, @@ -1114,9 +1039,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) return FUNCTION_STATUS_SUCCESS; } -/* Prints the expansion of *node (consuming further tokens from the - * list as necessary). Upon return *node will be the last consumed - * node, such that further processing can continue with node->next. */ +/* Appends expansion of *node (consuming further tokens from the list + * as necessary) onto result. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. */ static void _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, token_node_t **node_ret, @@ -1232,7 +1158,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - return FUNCTION_STATUS_SUCCESS; + return; } _token_paste (node->token, next_non_space->token); @@ -1248,6 +1174,74 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, talloc_free (arguments); } + +/* Appends the expansion of the token in *node onto result. + * Upon return *node will be the last consumed node, such that further + * processing can continue with node->next. */ +static void +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_node_t **node, + token_list_t *result) +{ + token_t *token = (*node)->token; + const char *identifier; + macro_t *macro; + token_list_t *expansion; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + /* We change any COMMA into a COMMA_FINAL to prevent + * it being mistaken for an argument separator + * later. */ + if (token->type == ',') { + token_t *new_token; + + new_token = _token_create_ival (result, COMMA_FINAL, + COMMA_FINAL); + _token_list_append (result, new_token); + } else { + _token_list_append (result, token); + } + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just append. */ + if (macro == NULL) { + _token_list_append (result, token); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) + { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_t *new_token; + + str = xtalloc_strdup (result, token->value.str); + new_token = _token_create_str (result, OTHER, str); + _token_list_append (result, new_token); + return; + } + + if (macro->is_function) { + _glcpp_parser_expand_function_onto (parser, node, result); + } else { + _string_list_push (parser->active, identifier); + _glcpp_parser_expand_token_list_onto (parser, + macro->replacements, + result); + _string_list_pop (parser->active); + } +} + static void _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *list, @@ -1260,12 +1254,7 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { - if (_glcpp_parser_expand_token_onto (parser, node->token, - result)) - { - _glcpp_parser_expand_function_onto (parser, &node, - result); - } + _glcpp_parser_expand_token_onto (parser, &node, result); } } -- cgit v1.2.3 From 681afbc855c86df8c3521ccdfadb7f16b9729baa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:06:02 -0700 Subject: Perform macro by replacing tokens in original list. We take the results of macro expansion and splice them into the original token list over which we are iterating. This makes it easy for function-like macro invocations to find their arguments since they are simply subsequent tokens on the list. This fixes the recently-introduced regressions (tests 55 and 56) and also passes new tests 60 and 61 introduced to strees this feature, (with macro-argument parentheses split between a macro value and the textual input). --- glcpp-parse.y | 278 +++++++++++++-------- .../060-left-paren-in-macro-right-paren-in-text.c | 3 + tests/061-define-chain-obj-to-func-multi.c | 5 + 3 files changed, 187 insertions(+), 99 deletions(-) create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c create mode 100644 tests/061-define-chain-obj-to-func-multi.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 9f97b2a282a..c89d7bf159c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -101,13 +101,12 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list); static void -_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, - token_list_t *list); +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list); static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -218,7 +217,8 @@ control_line: _token_list_append (expanded, token); talloc_unlink (parser, token); _glcpp_parser_evaluate_defined (parser, $2); - _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + _glcpp_parser_expand_token_list (parser, $2); + _token_list_append_list (expanded, $2); glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { @@ -240,7 +240,8 @@ control_line: _token_list_append (expanded, token); talloc_unlink (parser, token); _glcpp_parser_evaluate_defined (parser, $2); - _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + _glcpp_parser_expand_token_list (parser, $2); + _token_list_append_list (expanded, $2); glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { @@ -688,6 +689,22 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->non_space_tail = tail->non_space_tail; } +token_list_t * +_token_list_copy (void *ctx, token_list_t *other) +{ + token_list_t *copy; + token_node_t *node; + + if (other == NULL) + return NULL; + + copy = _token_list_create (ctx); + for (node = other->head; node; node = node->next) + _token_list_append (copy, node->token); + + return copy; +} + void _token_list_trim_trailing_space (token_list_t *list) { @@ -956,9 +973,12 @@ typedef enum function_status } function_status_t; /* Find a set of function-like macro arguments by looking for a - * balanced set of parentheses. Upon return *node will be the last - * consumed node, such that further processing can continue with - * node->next. + * balanced set of parentheses. + * + * When called, 'node' should be the opening-parenthesis token, (or + * perhaps preceeding SPACE tokens). Upon successful return *last will + * be the last consumed node, (corresponding to the closing right + * parenthesis). * * Return values: * @@ -976,13 +996,13 @@ typedef enum function_status * Macro name is not followed by a balanced set of parentheses. */ static function_status_t -_arguments_parse (argument_list_t *arguments, token_node_t **node_ret) +_arguments_parse (argument_list_t *arguments, + token_node_t *node, + token_node_t **last) { token_list_t *argument; - token_node_t *node = *node_ret, *last; int paren_count; - last = node; node = node->next; /* Ignore whitespace before first parenthesis. */ @@ -992,13 +1012,12 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; - last = node; node = node->next; argument = _token_list_create (arguments); _argument_list_append (arguments, argument); - for (paren_count = 1; node; last = node, node = node->next) { + for (paren_count = 1; node; node = node->next) { if (node->token->type == '(') { paren_count++; @@ -1006,11 +1025,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) else if (node->token->type == ')') { paren_count--; - if (paren_count == 0) { - last = node; - node = node->next; + if (paren_count == 0) break; - } } if (node->token->type == ',' && @@ -1031,32 +1047,44 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) } } - if (node && paren_count) + if (paren_count) return FUNCTION_UNBALANCED_PARENTHESES; - *node_ret = last; + *last = node; return FUNCTION_STATUS_SUCCESS; } -/* Appends expansion of *node (consuming further tokens from the list - * as necessary) onto result. Upon return *node will be the last - * consumed node, such that further processing can continue with - * node->next. */ -static void -_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +/* This is a helper function that's essentially part of the + * implementation of _glcpp_parser_expand_node. It shouldn't be called + * except for by that function. + * + * Returns NULL if node is a simple token with no expansion, (that is, + * although 'node' corresponds to an identifier defined as a + * function-like macro, it is not followed with a parenthesized + * argument list). + * + * Compute the complete expansion of node (which is a function-like + * macro) and subsequent nodes which are arguments. + * + * Returns the token list that results from the expansion and sets + * *last to the last node in the list that was consumed by the + * expansion. Specificallty, *last will be set as follows: as the + * token of the closing right parenthesis. + */ +static token_list_t * +_glcpp_parser_expand_function (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) + { macro_t *macro; - token_node_t *node; const char *identifier; argument_list_t *arguments; function_status_t status; token_list_t *substituted; int parameter_index; - node = *node_ret; identifier = node->token->value.str; macro = hash_table_find (parser->defines, identifier); @@ -1064,23 +1092,20 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, assert (macro->is_function); arguments = _argument_list_create (parser); - status = _arguments_parse (arguments, node_ret); + status = _arguments_parse (arguments, node, last); switch (status) { case FUNCTION_STATUS_SUCCESS: break; case FUNCTION_NOT_A_FUNCTION: - _token_list_append (result, node->token); - return; + return NULL; case FUNCTION_UNBALANCED_PARENTHESES: - fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", - identifier); - exit (1); + return NULL; } if (macro->replacements == NULL) { talloc_free (arguments); - return; + return _token_list_create (parser); } if (! ((_argument_list_length (arguments) == @@ -1094,7 +1119,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return; + return NULL; } /* Perform argument substitution on the replacement list. */ @@ -1114,9 +1139,9 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, * tokens, or append a placeholder token for * an empty argument. */ if (argument->head) { - _glcpp_parser_expand_token_list_onto (parser, - argument, - substituted); + _glcpp_parser_expand_token_list (parser, + argument); + _token_list_append_list (substituted, argument); } else { token_t *new_token; @@ -1158,7 +1183,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - return; + return NULL; } _token_paste (node->token, next_non_space->token); @@ -1168,22 +1193,33 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, } _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, substituted, result); + _glcpp_parser_expand_token_list (parser, substituted); _string_list_pop (parser->active); - talloc_free (arguments); + return substituted; } - -/* Appends the expansion of the token in *node onto result. - * Upon return *node will be the last consumed node, such that further - * processing can continue with node->next. */ -static void -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_node_t **node, - token_list_t *result) +/* Compute the complete expansion of node, (and subsequent nodes after + * 'node' in the case that 'node' is a function-like macro and + * subsequent nodes are arguments). + * + * Returns NULL if node is a simple token with no expansion. + * + * Otherwise, returns the token list that results from the expansion + * and sets *last to the last node in the list that was consumed by + * the expansion. Specificallty, *last will be set as follows: + * + * As 'node' in the case of object-like macro expansion. + * + * As the token of the closing right parenthesis in the case of + * function-like macro expansion. + */ +static token_list_t * +_glcpp_parser_expand_node (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) { - token_t *token = (*node)->token; + token_t *token = node->token; const char *identifier; macro_t *macro; token_list_t *expansion; @@ -1194,90 +1230,134 @@ _glcpp_parser_expand_token_onto (glcpp_parser_t *parser, * it being mistaken for an argument separator * later. */ if (token->type == ',') { - token_t *new_token; - - new_token = _token_create_ival (result, COMMA_FINAL, - COMMA_FINAL); - _token_list_append (result, new_token); - } else { - _token_list_append (result, token); + token->type = COMMA_FINAL; + token->value.ival = COMMA_FINAL; } - return; + + return NULL; } /* Look up this identifier in the hash table. */ identifier = token->value.str; macro = hash_table_find (parser->defines, identifier); - /* Not a macro, so just append. */ - if (macro == NULL) { - _token_list_append (result, token); - return; - } + /* Not a macro, so no expansion needed. */ + if (macro == NULL) + return NULL; /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) - { + if (_string_list_contains (parser->active, identifier, NULL)) { /* We change the token type here from IDENTIFIER to * OTHER to prevent any future expansion of this * unexpanded token. */ char *str; - token_t *new_token; + token_list_t *expansion; + token_t *final; - str = xtalloc_strdup (result, token->value.str); - new_token = _token_create_str (result, OTHER, str); - _token_list_append (result, new_token); - return; + str = xtalloc_strdup (parser, token->value.str); + final = _token_create_str (parser, OTHER, str); + expansion = _token_list_create (parser); + _token_list_append (expansion, final); + *last = node; + return expansion; } - if (macro->is_function) { - _glcpp_parser_expand_function_onto (parser, node, result); - } else { + if (! macro->is_function) + { + *last = node; + + if (macro->replacements == NULL) + return _token_list_create (parser); + + expansion = _token_list_copy (parser, macro->replacements); + _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, - macro->replacements, - result); + _glcpp_parser_expand_token_list (parser, expansion); _string_list_pop (parser->active); + + return expansion; } + + return _glcpp_parser_expand_function (parser, node, last); } +/* Walk over the token list replacing nodes with their expansion. + * Whenever nodes are expanded the walking will walk over the new + * nodes, continuing to expand as necessary. The results are placed in + * 'list' itself; + */ static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result) +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list) { - token_node_t *node; + token_node_t *node_prev; + token_node_t *node, *last; + token_list_t *expansion; - if (list == NULL || list->head == NULL) + if (list == NULL) return; - for (node = list->head; node; node = node->next) - { - _glcpp_parser_expand_token_onto (parser, &node, result); + _token_list_trim_trailing_space (list); + + node_prev = NULL; + node = list->head; + + while (node) { + /* Find the expansion for node, which will replace all + * nodes from node to last, inclusive. */ + expansion = _glcpp_parser_expand_node (parser, node, &last); + if (expansion) { + /* Splice expansion into list, supporting a + * simple deletion if the expansion is + * empty. */ + if (expansion->head) { + if (node_prev) + node_prev->next = expansion->head; + else + list->head = expansion->head; + expansion->tail->next = last->next; + if (last == list->tail) + list->tail = expansion->tail; + } else { + if (node_prev) + node_prev->next = last->next; + else + list->head = last->next; + if (last == list->tail) + list->tail == NULL; + } + } else { + node_prev = node; + } + node = node_prev ? node_prev->next : list->head; } + + list->non_space_tail = list->tail; +} + +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result) +{ + _glcpp_parser_expand_token_list (parser, list); + + _token_list_append_list (result, list); } void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { - token_list_t *expanded; - token_node_t *node; - function_status_t function_status; - if (list == NULL) return; - expanded = _token_list_create (parser); - - _glcpp_parser_expand_token_list_onto (parser, list, expanded); - - _token_list_trim_trailing_space (expanded); + _glcpp_parser_expand_token_list (parser, list); - _token_list_print (expanded); + _token_list_trim_trailing_space (list); - talloc_free (expanded); + _token_list_print (list); } void diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c b/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 00000000000..ed80ea879ce --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) diff --git a/tests/061-define-chain-obj-to-func-multi.c b/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 00000000000..6dbfd1f62d1 --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) -- cgit v1.2.3 From 631016946ca8134244c4e58bef6863d204b1119b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:07:24 -0700 Subject: Fix pass-through of '=' and add a test for it. Previously '=' was not included in our PUNCTUATION regeular expression, but it *was* excldued from our OTHER regular expression, so we were getting the default (and hamful) lex action of just printing it. The test we add here is named "punctuator" with the idea that we can extend it as needed for other punctuator testing. --- glcpp-lex.l | 2 +- glcpp-parse.y | 1 + tests/071-punctuator.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tests/071-punctuator.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 70d47d24975..52269c6b306 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -38,7 +38,7 @@ NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -PUNCTUATION [][(){}.&*~!/%<>^|;,+-] +PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? diff --git a/glcpp-parse.y b/glcpp-parse.y index c89d7bf159c..01ca08ec740 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -426,6 +426,7 @@ operator: | OR { $$ = OR; } | ';' { $$ = ';'; } | ',' { $$ = ','; } +| '=' { $$ = '='; } | PASTE { $$ = PASTE; } | DEFINED { $$ = DEFINED; } ; diff --git a/tests/071-punctuator.c b/tests/071-punctuator.c new file mode 100644 index 00000000000..959d6825988 --- /dev/null +++ b/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b -- cgit v1.2.3 From b06096e86eda1257769156523b5738044c6a2b10 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:54:19 -0700 Subject: Add test and fix bugs with multiple token-pasting on the same line. The list replacement when token pasting was broken, (failing to properly update the list's tail pointer). Also, memory management when pasting was broken, (modifying the original token's string which would cause problems with multiple calls to a macro which pasted a literal string). We didn't catch this with previous tests because they only pasted argument values. --- glcpp-parse.y | 92 +++++++++++++++++-------------------- tests/072-token-pasting-same-line.c | 2 + 2 files changed, 43 insertions(+), 51 deletions(-) create mode 100644 tests/072-token-pasting-same-line.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 01ca08ec740..f4c834e038f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -783,73 +783,53 @@ _token_print (token_t *token) } } -/* Change 'token' into a new token formed by pasting 'other'. */ -static void +/* Return a new token (talloc()ed off of 'token') formed by pasting + * 'token' and 'other'. Note that this function may return 'token' or + * 'other' directly rather than allocating anything new. + * + * Caution: Only very cursory error-checking is performed to see if + * the final result is a valid single token. */ +static token_t * _token_paste (token_t *token, token_t *other) { /* Pasting a placeholder onto anything makes no change. */ if (other->type == PLACEHOLDER) - return; + return token; - /* When 'token' is a placeholder, just return contents of 'other'. */ - if (token->type == PLACEHOLDER) { - token->type = other->type; - token->value = other->value; - return; - } + /* When 'token' is a placeholder, just return 'other'. */ + if (token->type == PLACEHOLDER) + return other; /* A very few single-character punctuators can be combined * with another to form a multi-character punctuator. */ switch (token->type) { case '<': - if (other->type == '<') { - token->type = LEFT_SHIFT; - token->value.ival = LEFT_SHIFT; - return; - } else if (other->type == '=') { - token->type = LESS_OR_EQUAL; - token->value.ival = LESS_OR_EQUAL; - return; - } + if (other->type == '<') + return _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT); + else if (other->type == '=') + return _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL); break; case '>': - if (other->type == '>') { - token->type = RIGHT_SHIFT; - token->value.ival = RIGHT_SHIFT; - return; - } else if (other->type == '=') { - token->type = GREATER_OR_EQUAL; - token->value.ival = GREATER_OR_EQUAL; - return; - } + if (other->type == '>') + return _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT); + else if (other->type == '=') + return _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL); break; case '=': - if (other->type == '=') { - token->type = EQUAL; - token->value.ival = EQUAL; - return; - } + if (other->type == '=') + return _token_create_ival (token, EQUAL, EQUAL); break; case '!': - if (other->type == '=') { - token->type = NOT_EQUAL; - token->value.ival = NOT_EQUAL; - return; - } + if (other->type == '=') + return _token_create_ival (token, NOT_EQUAL, NOT_EQUAL); break; case '&': - if (other->type == '&') { - token->type = AND; - token->value.ival = AND; - return; - } + if (other->type == '&') + return _token_create_ival (token, AND, AND); break; case '|': - if (other->type == '|') { - token->type = OR; - token->value.ival = OR; - return; - } + if (other->type == '|') + return _token_create_ival (token, OR, OR); break; } @@ -864,9 +844,11 @@ _token_paste (token_t *token, token_t *other) if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) && (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING)) { - token->value.str = talloc_strdup_append (token->value.str, - other->value.str); - return; + char *str; + + str = xtalloc_asprintf (token, "%s%s", + token->value.str, other->value.str); + return _token_create_str (token, token->type, str); } printf ("Error: Pasting \""); @@ -874,6 +856,8 @@ _token_paste (token_t *token, token_t *other) printf ("\" and \""); _token_print (other); printf ("\" does not give a valid preprocessing token.\n"); + + return token; } static void @@ -1159,6 +1143,8 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, /* After argument substitution, and before further expansion * below, implement token pasting. */ + _token_list_trim_trailing_space (substituted); + node = substituted->head; while (node) { @@ -1187,12 +1173,16 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, return NULL; } - _token_paste (node->token, next_non_space->token); + node->token = _token_paste (node->token, next_non_space->token); node->next = next_non_space->next; + if (next_non_space == substituted->tail) + substituted->tail = node; node = node->next; } + substituted->non_space_tail = substituted->tail; + _string_list_push (parser->active, identifier); _glcpp_parser_expand_token_list (parser, substituted); _string_list_pop (parser->active); diff --git a/tests/072-token-pasting-same-line.c b/tests/072-token-pasting-same-line.c new file mode 100644 index 00000000000..e421e9d5e29 --- /dev/null +++ b/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) -- cgit v1.2.3 From 75ef1c75dd47a0b4054a767fd94f7c3cf68d2331 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:57:22 -0700 Subject: Add killer test case from the C99 specification. Happily, this passes now, (since many of the previously added test cases were extracted from this one). --- tests/099-c99-example.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/099-c99-example.c diff --git a/tests/099-c99-example.c b/tests/099-c99-example.c new file mode 100644 index 00000000000..d1976b1f265 --- /dev/null +++ b/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; -- cgit v1.2.3