COP 3402 meeting -*- Outline -*- * Declaration Checking Recall the goals of this: ** goals ------------------------------------------ GOALS OF DECLARATION CHECKING Check that: 1. Each declaration has a unique name (in its potential scope) 2. Each use of an identifier is for a name that has already been declared (in a surrounding scope) ------------------------------------------ ... (don't allow duplicate declarations, to avoid any misunderstanding/confusion) ... (can't use undeclared identifiers) (so we can know what each name's attributes are) ** process ------------------------------------------ PROCESS OF DECLARATION CHECKING In each potential scope: In each statement, expression: ------------------------------------------ ... a. Add each declaration's name and attributes, and check for duplicate names in that potential scope b. (Check language rules for declarations, if any) a. Check for undeclared identifier uses b. (Check any implicit uses that are not written) c. (Check for privacy violations, e.g., in C++ check where the use is vs. the declared privacy attributes) *** example using the FLOAT language ------------------------------------------ EXAMPLE: FLOAT LANGUAGE See: http://www.cs.ucf.edu/~leavens/COP3402/ example-code/index.html#FloatCalc ------------------------------------------ This may be useful for HW3 **** ASTs ------------------------------------------ ASTs for the FLOAT language (1/3) // file ast.h typedef struct { file_location *file_loc; void *next; // for lists } generic_t; // empty ::= typedef struct { file_location *file_loc; } empty_t; // ident typedef struct ident_s { file_location *file_loc; struct ident_s *next; // for lists const char *name; } ident_t; // floating point numbers typedef struct { file_location *file_loc; const char *text; word_type value; } number_t; // tokens as ASTs typedef struct { file_location *file_loc; const char *text; int code; } token_t; // tokens as ASTs typedef struct { file_location *file_loc; const char *text; int code; } token_t; // kinds of expressions typedef enum { expr_bin_op, expr_ident, expr_number, expr_logical_not } expr_kind_e; // forward declaration for expressions struct expr_s; // expr ::= expr op expr // arithOp ::= + | - | * | / typedef struct { file_location *file_loc; struct expr_s *expr1; token_t op; struct expr_s *expr2; } binary_op_expr_t; // expr ::= expr arithOp expr | ident | number typedef struct expr_s { file_location *file_loc; expr_kind_e expr_kind; union expr_u { binary_op_expr_t binary; ident_t ident; number_t number; struct expr_s *logical_not; } data; } expr_t; // ... ------------------------------------------ ------------------------------------------ ASTs for the FLOAT language (2/3) // file ast.h (continued) // idents typedef struct { file_location *file_loc; ident_t *idents; } idents_t; // varType ::= float | bool typedef enum {float_type, bool_type } var_type_e; // varDecl ::= varType ident typedef struct var_decl_s { file_location *file_loc; struct var_decl_s *next; // for lists var_type_e var_type; idents_t idents; } var_decl_t; // var-decls ::= { varDecl } typedef struct { file_location *file_loc; var_decl_t *var_decls; } var_decls_t; // kinds of statements typedef enum { assign_stmt, begin_stmt, if_stmt, read_stmt, write_stmt } stmt_kind_e; // forward declaration struct stmt_s; // assignStmt ::= ident = expr typedef struct { file_location *file_loc; const char *name; struct expr_s *expr; } assign_stmt_t; // stmts ::= { stmts } typedef struct { file_location *file_loc; struct stmt_s *stmts; } stmts_t; // beginStmt ::= begin varDecls stmts typedef struct { file_location *file_loc; var_decls_t var_decls; stmts_t stmts; } begin_stmt_t; // ifStmt ::= if expr stmt typedef struct { file_location *file_loc; expr_t expr; struct stmt_s *body; } if_stmt_t; // readStmt ::= read ident typedef struct { file_location *file_loc; const char *name; } read_stmt_t; // writeStmt ::= write expr typedef struct { file_location *file_loc; expr_t expr; } write_stmt_t; // stmt ::= assignStmt | beginStmt // | ifStmt | readStmt | writeStmt typedef struct stmt_s { file_location *file_loc; struct stmt_s *next; // for lists stmt_kind_e stmt_kind; union { assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; } data; } stmt_t; ------------------------------------------ ------------------------------------------ ASTs for the FLOAT language (3/3) // file ast.h // ... // program ::= varDecls stmt typedef struct block_s { file_location *file_loc; var_decls_t var_decls; stmt_t stmt; } program_t; // The AST type used by bison typedef union AST_u { generic_t generic; var_decls_t var_decls; var_decl_t var_decl; idents_t idents; stmt_t stmt; assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; stmts_t stmts; expr_t expr; binary_op_expr_t binary_op_expr; token_t token; number_t number; ident_t ident; empty_t empty; } AST; // file parser_types.h // ... typedef AST YYSTYPE; // ... ------------------------------------------ **** attributes (type id_attrs) ------------------------------------------ THE TYPE ID_ATTRS FOR ATTRIBUTES // file id_attrs.h // attributes of identifiers typedef struct { file_location file_loc; var_type_e var_type; // num. of variable decls before // this one in this scope unsigned int offset_count; } id_attrs; // Allocate and return an id_attrs struct // with field file_loc set to floc, // var_type set to vt, // and offset_count set to ofst_cnt. // This should never return NULL. extern id_attrs *id_attrs_loc_create( file_location floc, var_type_e vt, unsigned int ofst_cnt); ------------------------------------------ **** symbol table The design is a stack of "scope"s, where each scope is a mapping from names to attributes ------------------------------------------ SYMBOL TABLE API // file symtab.h // Maximum nesting of potential scopes #define MAX_NESTING 100 // initialize the symbol table extern void symtab_initialize(); // Return the number of scopes // currently in the symbol table. extern unsigned int symtab_size(); // Return the current scope's // count of variables declared extern unsigned int symtab_scope_loc_count(); // Return the current scope's size // (the number of declared ids). extern unsigned int symtab_scope_size(); // Is the current scope full? extern bool symtab_scope_full(); // Return the current nesting level // (num. of symtab_enter_scope() calls // - num. of symtab_leave_scope() calls extern unsigned int symtab_current_nesting_level(); // Is the symbol table itself full? extern bool symtab_full(); // Is name declared? // (this looks back through all scopes) extern bool symtab_declared(const char *name); // Is name declared in the current scope? // (this only looks in the current scope) extern bool symtab_declared_in_current_scope( const char *name); // Requires: attrs != NULL && // !symtab_declared_in_current_scope(name) // Add an association from the given name // to the given attributes extern void symtab_insert( const char *name, id_attrs *attrs); // Requires: !symtab_full() // Start a new scope (for a block) extern void symtab_enter_scope(); // Requires: !symtab_empty() extern void symtab_leave_scope(); // If name is declared, return // an id_use pointer for it, otherwise // return NULL if name isn't declared extern id_use *symtab_lookup( const char *name); ------------------------------------------ **** identifier uses (type id_use) ------------------------------------------ THE TYPE ID_USE // file id_use.h // An id_use struct gives information from // looking up name in the symbol table: // the attributes (id_attrs *), // and the number of lexical levels out // from the current scope, // where the name was declared. typedef struct { id_attrs *attrs; unsigned int levelsOutward; } id_use; // Requires: attrs != NULL // Allocate and return an id_use struct // containing attrs and levelsOut. extern id_use *id_use_create( id_attrs *attrs, unsigned int levelsOut); ------------------------------------------ **** scope mappings (type scope_t) ------------------------------------------ SCOPE MAPPING API // file scope.h // Maximum number of declarations // that can be stored in a scope #define MAX_SCOPE_SIZE 4096 typedef struct { const char *id; id_attrs *attrs; } scope_assoc_t; // Invariant: 0 <= size < MAX_SCOPE_SIZE; typedef struct scope_s { unsigned int size; // num. of associations in this scope unsigned int loc_count; scope_assoc_t *entries[MAX_SCOPE_SIZE]; } scope_t; // Allocate and return a fresh scope_t // (Bail with an error if no space) extern scope_t *scope_create(); // The num. of non-procedure // declartions that have been added to s extern unsigned int scope_loc_count( scope_t *s); // Return the number of declared identifier // associations in s extern unsigned int scope_size(scope_t *s); // Is the current scope full? extern bool scope_full(scope_t *s); // Is the given name declared in s? extern bool scope_declared(scope_t *s, const char *name); // Requires: attrs != NULL && // !scope_declared(name); // Add an association from name to attrs, // store the next_loc_offset value into // attrs->loc_offset, then increase // the next_loc_offset for s by 1. extern void scope_insert(scope_t *s, const char *name, id_attrs *attrs); // Return (a pointer to) the attributes // of the given name in s // or NULL if name is not declared in s extern id_attrs *scope_lookup(scope_t *s, const char *name); ------------------------------------------ The type scope_t is what is stored on the stack in a symtab I just used a list of associations, stored in an array and linear search. But with this API, it could be implemented in many different ways **** Building the symbol table ***** programs ------------------------------------------ BUILDING THE SYMBOL TABLE Abstract Syntax of FLOAT: program ::= varDecls stmt // in file ast.h typedef struct block_s { file_location *file_loc; var_decls_t var_decls; stmt_t stmt; } program_t; // In file scope_check.c // ... #include "ast.h" #include "symtab.h" // ... // Build the symbol table for prog // and check for duplicate declarations // or uses of undeclared identifiers void scope_check_program(program_t prog) { symtab_enter_scope(); scope_check_varDecls(prog.var_decls); scope_check_stmt(prog.stmt); symtab_leave_scope(); } ------------------------------------------ Compare with the ASTs for programs. The basic idea of such a tree walk is always to recursively examine each subtree ------------------------------------------ WALKING THE ASTS: UNPARSER VS. SCOPE_CHECK // file unparser.c // ... #include "ast.h" // ... // Unparse the given program AST // with output going to the file out void unparseProgram(FILE *out, program_t prog) { unparseVarDecls(out, prog.var_decls, 0); unparseStmt(out, prog.stmt, 0); } // In file scope_check.c // ... #include "ast.h" #include "symtab.h" // ... // Build the symbol table for prog // and check for duplicate declarations // or uses of undeclared identifiers void scope_check_program(program_t prog) { symtab_enter_scope(); scope_check_varDecls(prog.var_decls); scope_check_stmt(prog.stmt); symtab_leave_scope(); } ------------------------------------------ Q: Why are both of these similar? They both follow the grammar! (i.e., the grammar of the abstract syntax) program ::= varDecls stmt ***** variable declarations ****** handling the lists ------------------------------------------ LISTS OF VARIABLE DECLARATIONS // file unparser.c // ... // Unparse the var_decls_t vds to out // with the given nesting level // (note that the list is empty, // then nothing is printed) void unparseVarDecls(FILE *out, var_decls_t vds, int level) { var_decl_t *vdp = vds.var_decls; while (vdp != NULL) { unparseVarDecl(out, *vdp, level); vdp = vdp->next; } } // file scope_check.c // build the symbol table // and check the declarations in vds void scope_check_varDecls(var_decls_t vds) { } ------------------------------------------ vds points to the head of the linked list of var_decl_t structs, so need to iterate over that: ... var_decl_t *vdp = vds.var_decls; while (vdp != NULL) { scope_check_varDecl(*vdp); vdp = vdp->next; } ****** handling each variable declaration ------------------------------------------ INDIVIDUAL VARIABLE DECLARATIONS (1/2) // Abstract syntax: // varDecl ::= varType idents // varType ::= float | bool // file ast.h // ... // varType ::= float | bool typedef enum {float_type, bool_type } var_type_e; // varDecl ::= varType ident typedef struct var_decl_s { file_location *file_loc; struct var_decl_s *next; // for lists var_type_e var_type; idents_t idents; } var_decl_t; // file unparser.c // ... // Unparse a single var-decl, vd, to out, // indented for the given nesting level void unparseVarDecl(FILE *out, var_decl_t vd, int level) { indent(out, level); fprintf(out, "%s ", vt2str(vd.var_type)); unparseIdents(out, vd.idents); semiAndNewline(out); } // Add declarations for the names in vd, // reporting duplicate declarations void scope_check_varDecl(var_decl_t vd) { } ------------------------------------------ ... scope_check_idents(vd.idents, vd.var_type); have to decide on the parameters for scope_check_idents, it needs to know the var_type ------------------------------------------ INDIVIDUAL VARIABLE DECLARATIONS (2/2) // file unparser.c // ... // unparse the identififers in idents, // separated by commas, to out void unparseIdents(FILE *out, idents_t idents) { ident_t *idp = idents.idents; bool printed_any = false; while (idp != NULL) { if (printed_any) { fprintf(out, ", "); } unparseIdent(out, *idp); printed_any = true; idp = idp->next; } } // in scope_check.c // ... // Add declarations for the names in ids // to current scope as type vt // reporting any duplicate declarations void scope_check_idents(idents_t ids, var_type_e vt) { } ------------------------------------------ ... ident_t *idp = ids.idents; while (idp != NULL) { scope_check_declare_ident(*idp, vt); idp = idp->next; } ------------------------------------------ DECLARING A SINGLE VARIABLE // file unparser.c // ... // Unparse the given identifier to out void unparseIdent(FILE *out, ident_t id) { fprintf(out, "%s", id.name); } // file scope_check.c // ... // Add declaration for id // to current scope as type vt // reporting if it's a duplicate declaration void scope_check_declare_ident(ident_t id, var_type_e vt) { } ------------------------------------------ ... if (symtab_declared_in_current_scope(id.name)) { // only variables in FLOAT bail_with_prog_error(*(id.file_loc), "Variable \"%s\" has already been declared!", id.name); } else { int ofst_cnt = symtab_scope_loc_count(); id_attrs *attrs = id_attrs_loc_create(*(id.file_loc), vt, ofst_cnt); symtab_insert(id.name, attrs); } **** Checking for undeclared identifiers ------------------------------------------ CHECKING FOR UNDECLARED IDENTIFIERS Where would they be? So need to: ------------------------------------------ ... in expressions, which can only be in statements in FLOAT (some languages might have expressions in declarations also) ... walk over the AST, in each statement, walk the AST, look for undeclared identifier uses (where could those be?) and in each expression, walk the AST and look for undeclared identifier uses ***** undeclared identifiers in statments ****** ASTs for statements ------------------------------------------ CHECKING STATEMENTS FOR UNDECLARED IDs // file ast.h // ... // kinds of statements typedef enum {assign_stmt, begin_stmt, if_stmt, read_stmt, write_stmt } stmt_kind_e; // forward declaration struct stmt_s; // assignStmt ::= ident = expr typedef struct { file_location *file_loc; const char *name; struct expr_s *expr; } assign_stmt_t; // stmts ::= { stmt } typedef struct { file_location *file_loc; struct stmt_s *stmts; } stmts_t; // beginStmt ::= begin varDecls stmts typedef struct { file_location *file_loc; var_decls_t var_decls; stmts_t stmts; } begin_stmt_t; // ... // stmt ::= assignStmt | beginStmt | ifStmt | readStmt | writeStmt typedef struct stmt_s { file_location *file_loc; struct stmt_s *next; // for lists stmt_kind_e stmt_kind; union { assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; } data; } stmt_t; ------------------------------------------ ****** checking statements ------------------------------------------ DECLARATION CHECKING OF STATEMENTS (1/2) // file unparser.c // ... // Unparse stmt to out, // indented for the given level. void unparseStmt(FILE *out, stmt_t stmt, int indentLevel) { switch (stmt.stmt_kind) { case assign_stmt: unparseAssignStmt(out, stmt.data.assign_stmt, indentLevel); break; case begin_stmt: unparseBeginStmt(out, stmt.data.begin_stmt, indentLevel); break; case if_stmt: // ... } } ------------------------------------------ ------------------------------------------ DECLARATION CHECKING OF STATEMENTS (2/2) // check the statement to make sure that // all idenfifiers used have been declared // (if not, then produce an error) void scope_check_stmt(stmt_t stmt) { } ------------------------------------------ ... switch (stmt.stmt_kind) { case assign_stmt: scope_check_assignStmt(stmt.data.assign_stmt); break; case begin_stmt: scope_check_beginStmt(stmt.data.begin_stmt); break; case if_stmt: // ... } Note how this is like the unparser! ------------------------------------------ DECLARATION CHECKING ASSIGNMENT STATEMENTS // file unparser.c // ... // Unparse stmt to out // with indentation level given by level, void unparseAssignStmt(FILE *out, assign_stmt_t stmt, int level) { indent(out, level); fprintf(out, "%s = ", stmt.name); unparseExpr(out, *(stmt.expr)); semiAndNewline(out); } // check the statement for // undeclared identifiers void scope_check_assignStmt( assign_stmt_t stmt) { } ------------------------------------------ ... const char *name = stmt.name; id_use *idu = scope_check_ident_declared(*(stmt.file_loc), name); assert(idu != NULL); // since would bail if not declared scope_check_expr(*(stmt.expr)); ------------------------------------------ CHECKING AN INDIVIDUAL IDENTIFIER // check that name has been declared, // if so, then return an id_use for it // otherwise, produce an error id_use *scope_check_ident_declared( file_location floc, const char *name) { } ------------------------------------------ ... id_use *ret = symtab_lookup(name); if (ret == NULL) { bail_with_prog_error(floc, "identifier \"%s\" is not declared!", name); } return ret; ------------------------------------------ DECLARATION CHECKING OF BEGIN STATEMENTS // file unparser.c // ... // Unparse stmt to out // with indentation level given by level, void unparseBeginStmt(FILE *out, begin_stmt_t stmt, int level) { indent(out, level); fprintf(out, "{\n"); unparseVarDecls(out, stmt.var_decls, level+1); unparseStmts(out, stmt.stmts,level+1); indent(out, level); fprintf(out, "}\n"); } // file scope_check.c // ... // check the statement for // duplicate declarations and for // undeclared identifiers void scope_check_beginStmt(begin_stmt_t stmt) { } ------------------------------------------ ... symtab_enter_scope(); scope_check_varDecls(stmt.var_decls); scope_check_stmts(stmt.stmts); symtab_leave_scope(); Q: Does the order matter in the scope checking? Yes, need to do the declarations first! Q: How do we know when to call symtab_enter_scope and symtab_leave_scope? when there are declarations possible in that area, then need to enter a scope when those are no longer visible, then need to leave that scope **** checking expressions ------------------------------------------ DECLARATION CHECKING EXPRESSIONS (1/3) // file ast.h // ... // kinds of expressions typedef enum {expr_bin_op, expr_ident, expr_number, expr_logical_not } expr_kind_e; // forward declaration for expressions struct expr_s; // expr ::= expr op expr // op ::= == | != | < | <= | + | - | * | / typedef struct { file_location *file_loc; struct expr_s *expr1; token_t op; struct expr_s *expr2; } binary_op_expr_t; // expr ::= expr op expr | ident | number typedef struct expr_s { file_location *file_loc; expr_kind_e expr_kind; union expr_u { binary_op_expr_t binary; ident_t ident; number_t number; struct expr_s *logical_not; } data; } expr_t; ------------------------------------------ ------------------------------------------ DECLARATION CHECKING EXPRESSIONS (2/3) // file unparser.c // ... // Unparse the expression exp to out // adding parentheses to indicate nesting void unparseExpr(FILE *out, expr_t exp) { switch (exp.expr_kind) { case expr_bin_op: unparseBinaryOpExpr(out, exp.data.binary); break; case expr_ident: unparseIdent(out, exp.data.ident); break; case expr_number: unparseNumber(out, exp.data.number); break; case expr_logical_not: unparseLogicalNotExpr(out, *(exp.data.logical_not)); break; default: bail_with_error(/* ... */); break; } } ------------------------------------------ ------------------------------------------ DECLARATION CHECKING EXPRESSIONS (3/3) // file scope_check.c // ... // check the expresion to make sure that // all idenfifiers used have been declared // (if not, then produce an error) void scope_check_expr(expr_t exp) { } ------------------------------------------ ... switch (exp.expr_kind) { case expr_bin_op: scope_check_binary_op_expr(exp.data.binary); break; case expr_ident: scope_check_ident_expr(exp.data.ident); break; case expr_number: // no identifiers in this case break; case expr_logical_not: scope_check_expr(*(exp.data.logical_not)); break; default: bail_with_error("Unexpected expr_kind_e (%d) in scope_check_expr", exp.expr_kind); break; } Q: What should be done for numbers? Nothing, becuase there can't be any identifier uses in them Q: What should be done for logical not expressions? Recurse into the subexpression (following the grammar) ------------------------------------------ DECLARATION CHECKING FOR BINARY OP EXPRs // check that all identifiers used in exp // have been declared // (if not, then produce an error) void scope_check_binary_op_expr(binary_op_expr_t exp) { scope_check_expr(*(exp.expr1)); // (note: no identifiers can occur in the operator) scope_check_expr(*(exp.expr2)); } ------------------------------------------ ... scope_check_expr(*(exp.expr1)); // (note: no identifiers can occur in the operator) scope_check_expr(*(exp.expr2)); Q: Is there anything you want look at to see what to do? Possibly the AST for binary_op_expr_t or the unparser... Q: Is there anything that should be done for the operator? No, can't be any identifier uses there. ------------------------------------------ DECLARATION CHECKING FOR IDENTIFIER EXPRs // check id to make sure that // all it has been declared // (if not, then produce an error) void scope_check_ident_expr(ident_t id) { } ------------------------------------------ ... scope_check_ident_declared(*(id.file_loc), id.name); Q: Is there anything you want look at to see what to do? Possibly the AST for ident_t or the unparser... *** Summary of Declaration Checking ------------------------------------------ SUMMARY OF DECLARATION CHECKING Key concepts/data: Algorithm: ------------------------------------------ ... - Abstract Syntax Trees (ASTs) - Attributes (id_attrs) - Symbol Table (symtab) ... Walk the abstract syntax tree - call symtab_enter_scope() when starting a potential scope - call symtab_leave_scope(), when finishing with potential scope