GOALS OF DECLARATION CHECKING Check that: 1. Each declaration has a unique name in its potential scope (block) - no duplicate declarations 2. Each use of an identifier is declared - no uses of undeclared identifiers SYMBOL TABLE def: a *symbol table* is a mapping from identifiers to attributes Data Structures: a hash table, or array, binary search tree ATTRIBUTES def: an *attribute* is information kept about a name by the compiler Examples of attributes: - type or size - kind of name (const, var, procedure) - offset in AR - source code location OPERATIONS FOR SYMBOL TABLES void initialize(); int size(); bool full(); bool declared(name); id_use *lookup(name); bool declared_in_current_scope(name); void insert(name, id_attrs); void enter_scope(); void leave_scope(); SYMBOL TABLES AND POTENTIAL SCOPES def: a declaration's *scope* is the area of the program's text where the declaration has effect def: a *potential scope* is an area of a program's text where declarations could have scope (blocks in PL/0) Grammar of a PL/0 program ::= . ::= { } ::= procedure ; ; Example: procedure p; var x; x := 2; begin x := 1; # is this use declared? call p end. Example: What's the final value of x? var x; procedure p; var x; x := 2; procedure q; x := 3; begin x := 1; call q; call p; end. STRATEGIES FOR NESTED SCOPES Needs: Approaches: - [Active Deletion] track where a name was added, remove them when leave the scope - [Stack-Based] keep a stack of mappings, one for each scope (recommended) - [Functional] - [Decorations in AST] GOALS OF DECLARATION CHECKING Check that: 1. Each declaration has a unique name (in its potential scope) 2. Each use of an identifier is for a name that has already been declared (in a surrounding scope) PROCESS OF DECLARATION CHECKING In each potential scope: enter a new mapping when the tree enter the block add each declared identifier and its attributes checking for duplicate declarations leave it when finished with the block In each statement, expression: check each identifier use to see that the name was declared EXAMPLE: FLOAT LANGUAGE See: http://www.cs.ucf.edu/~leavens/COP3402/ example-code/index.html#FloatCalc ASTs for the FLOAT language (1/3) // file ast.h typedef struct { file_location *file_loc; void *next; // for lists } generic_t; // empty ::= typedef struct { file_location *file_loc; } empty_t; // ident typedef struct ident_s { file_location *file_loc; struct ident_s *next; // for lists const char *name; } ident_t; // floating point numbers typedef struct { file_location *file_loc; const char *text; word_type value; } number_t; // tokens as ASTs typedef struct { file_location *file_loc; const char *text; int code; } token_t; // kinds of expressions typedef enum { expr_bin_op, expr_ident, expr_number, expr_logical_not } expr_kind_e; // forward declaration for expressions struct expr_s; // expr ::= expr op expr // op ::= == | != | < | <= | + | - | * | / typedef struct { file_location *file_loc; struct expr_s *expr1; token_t op; struct expr_s *expr2; } binary_op_expr_t; // expr ::= expr op expr | ident | number | ! expr typedef struct expr_s { file_location *file_loc; expr_kind_e expr_kind; union expr_u { binary_op_expr_t binary; ident_t ident; number_t number; struct expr_s *logical_not; } data; } expr_t; // ... ASTs for the FLOAT language (2/3) // file ast.h (continued) // idents typedef struct { file_location *file_loc; ident_t *idents; } idents_t; // varType ::= float | bool typedef enum {float_type, bool_type } var_type_e; // varDecl ::= varType idents typedef struct var_decl_s { file_location *file_loc; struct var_decl_s *next; // for lists var_type_e var_type; idents_t idents; } var_decl_t; // var-decls ::= { varDecl } typedef struct { file_location *file_loc; var_decl_t *var_decls; } var_decls_t; // kinds of statements typedef enum { assign_stmt, begin_stmt, if_stmt, read_stmt, write_stmt } stmt_kind_e; // forward declaration struct stmt_s; // assignStmt ::= ident = expr typedef struct { file_location *file_loc; const char *name; struct expr_s *expr; } assign_stmt_t; // stmts ::= { stmts } typedef struct { file_location *file_loc; struct stmt_s *stmts; } stmts_t; // beginStmt ::= begin varDecls stmts typedef struct { file_location *file_loc; var_decls_t var_decls; stmts_t stmts; } begin_stmt_t; // ifStmt ::= if expr stmt typedef struct { file_location *file_loc; expr_t expr; struct stmt_s *body; } if_stmt_t; // readStmt ::= read ident typedef struct { file_location *file_loc; const char *name; } read_stmt_t; // writeStmt ::= write expr typedef struct { file_location *file_loc; expr_t expr; } write_stmt_t; // stmt ::= assignStmt | beginStmt // | ifStmt | readStmt | writeStmt typedef struct stmt_s { file_location *file_loc; struct stmt_s *next; // for lists stmt_kind_e stmt_kind; union { assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; } data; } stmt_t; ASTs for the FLOAT language (3/3) // file ast.h // ... // program ::= varDecls stmt typedef struct block_s { file_location *file_loc; var_decls_t var_decls; stmt_t stmt; } program_t; // The AST type used by bison typedef union AST_u { generic_t generic; var_decls_t var_decls; var_decl_t var_decl; idents_t idents; stmt_t stmt; assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; stmts_t stmts; expr_t expr; binary_op_expr_t binary_op_expr; token_t token; number_t number; ident_t ident; empty_t empty; } AST; // file parser_types.h // ... typedef AST YYSTYPE; // ... THE TYPE ID_ATTRS FOR ATTRIBUTES // file id_attrs.h // attributes of identifiers typedef struct { file_location file_loc; var_type_e var_type; // num. of variable decls before // this one in this scope unsigned int offset_count; } id_attrs; // Allocate and return an id_attrs struct // with field file_loc set to floc, // var_type set to vt, // and offset_count set to ofst_cnt. // This should never return NULL. extern id_attrs *id_attrs_loc_create( file_location floc, var_type_e vt, unsigned int ofst_cnt); SYMBOL TABLE API // file symtab.h // Maximum nesting of potential scopes #define MAX_NESTING 100 // initialize the symbol table extern void symtab_initialize(); // Return the number of scopes // currently in the symbol table. extern unsigned int symtab_size(); // Return the current scope's // count of variables declared extern unsigned int symtab_scope_loc_count(); // Return the current scope's size // (the number of declared ids). extern unsigned int symtab_scope_size(); // Is the current scope full? extern bool symtab_scope_full(); // Return the current nesting level // (num. of symtab_enter_scope() calls // - num. of symtab_leave_scope() calls extern unsigned int symtab_current_nesting_level(); // Is the symbol table itself full? extern bool symtab_full(); // Is name declared? // (this looks back through all scopes) extern bool symtab_declared(const char *name); // Is name declared in the current scope? // (this only looks in the current scope) extern bool symtab_declared_in_current_scope( const char *name); // Requires: attrs != NULL && // !symtab_declared_in_current_scope(name) // Add an association from the given name // to the given attributes extern void symtab_insert( const char *name, id_attrs *attrs); // Requires: !symtab_full() // Start a new scope (for a block) extern void symtab_enter_scope(); // Requires: !symtab_empty() extern void symtab_leave_scope(); // If name is declared, return // an id_use pointer for it, otherwise // return NULL if name isn't declared extern id_use *symtab_lookup( const char *name); THE TYPE ID_USE // file id_use.h // An id_use struct gives information from // looking up name in the symbol table: // the attributes (id_attrs *), // and the number of lexical levels out // from the current scope, // where the name was declared. typedef struct { id_attrs *attrs; unsigned int levelsOutward; } id_use; // Requires: attrs != NULL // Allocate and return an id_use struct // containing attrs and levelsOut. extern id_use *id_use_create( id_attrs *attrs, unsigned int levelsOut); SCOPE MAPPING API // file scope.h // Maximum number of declarations // that can be stored in a scope #define MAX_SCOPE_SIZE 4096 typedef struct { const char *id; id_attrs *attrs; } scope_assoc_t; // Invariant: 0 <= size < MAX_SCOPE_SIZE; typedef struct scope_s { // num. of associations in this scope unsigned int size; unsigned int loc_count; scope_assoc_t *entries[MAX_SCOPE_SIZE]; } scope_t; // Allocate and return a fresh scope_t // (Bail with an error if no space) extern scope_t *scope_create(); // The num. of non-procedure // declartions that have been added to s extern unsigned int scope_loc_count( scope_t *s); // Return the number of declared identifier // associations in s extern unsigned int scope_size(scope_t *s); // Is the current scope full? extern bool scope_full(scope_t *s); // Is the given name declared in s? extern bool scope_declared(scope_t *s, const char *name); // Requires: attrs != NULL && // !scope_declared(name); // Add an association from name to attrs, // store the next_loc_offset value into // attrs->loc_offset, then increase // the next_loc_offset for s by 1. extern void scope_insert(scope_t *s, const char *name, id_attrs *attrs); // Return (a pointer to) the attributes // of the given name in s // or NULL if name is not declared in s extern id_attrs *scope_lookup(scope_t *s, const char *name); BUILDING THE SYMBOL TABLE Abstract Syntax of FLOAT: program ::= varDecls stmt // in file ast.h typedef struct block_s { file_location *file_loc; var_decls_t var_decls; stmt_t stmt; } program_t; WALKING THE ASTS: UNPARSER VS. SCOPE_CHECK // file unparser.c // ... #include "ast.h" // ... // Unparse the given program AST // with output going to the file out void unparseProgram(FILE *out, program_t prog) { unparseVarDecls(out, prog.var_decls, 0); unparseStmt(out, prog.stmt, 0); } // In file scope_check.c // ... #include "ast.h" #include "symtab.h" // ... // Build the symbol table for prog // and check for duplicate declarations // or uses of undeclared identifiers void scope_check_program(program_t prog) { symtab_enter_scope(); scope_check_varDecls(prog.var_decls); scope_check_stmt(prog.stmt); symtab_leave_scope(); } LISTS OF VARIABLE DECLARATIONS // file unparser.c // ... // Unparse the var_decls_t vds to out // with the given nesting level // (note that the list is empty, // then nothing is printed) void unparseVarDecls(FILE *out, var_decls_t vds, int level) { var_decl_t *vdp = vds.var_decls; while (vdp != NULL) { unparseVarDecl(out, *vdp, level); vdp = vdp->next; } } // file scope_check.c // build the symbol table // and check the declarations in vds void scope_check_varDecls(var_decls_t vds) { var_decl_t *vdp = vds.var_decls; while (vdp != NULL) { scope_check_varDecl(*vdp); vdp = vdp->next; } } INDIVIDUAL VARIABLE DECLARATIONS (1/2) // Abstract syntax: // varDecl ::= varType idents // varType ::= float | bool // file ast.h // ... // varType ::= float | bool typedef enum {float_type, bool_type } var_type_e; // varDecl ::= varType ident typedef struct var_decl_s { file_location *file_loc; struct var_decl_s *next; // for lists var_type_e var_type; idents_t idents; } var_decl_t; // file unparser.c // ... // Unparse a single var-decl, vd, to out, // indented for the given nesting level void unparseVarDecl(FILE *out, var_decl_t vd, int level) { indent(out, level); fprintf(out, "%s ", vt2str(vd.var_type)); unparseIdents(out, vd.idents); semiAndNewline(out); } // Add declarations for the names in vd, // reporting duplicate declarations void scope_check_varDecl(var_decl_t vd) { var_type_e vt = vd.var_type; scope_check_idents(vd.idents, vt); } INDIVIDUAL VARIABLE DECLARATIONS (2/2) // file unparser.c // ... // unparse the identififers in idents, // separated by commas, to out void unparseIdents(FILE *out, idents_t idents) { ident_t *idp = idents.idents; bool printed_any = false; while (idp != NULL) { if (printed_any) { fprintf(out, ", "); } unparseIdent(out, *idp); printed_any = true; idp = idp->next; } } // in scope_check.c // ... // Add declarations for the names in ids // to current scope as type vt // reporting any duplicate declarations void scope_check_idents(idents_t ids, var_type_e vt) { ident_t *idp = idents.idents; while (idp != NULL) { scope_check_declare_ident( name, vt); idp = idp->next; } } DECLARING A SINGLE VARIABLE // file unparser.c // ... // Unparse the given identifier to out void unparseIdent(FILE *out, ident_t id) { fprintf(out, "%s", id.name); } // file scope_check.c // ... // Add declaration for id // to current scope as type vt // reporting if it's a duplicate declaration void scope_check_declare_ident( ident_t id, var_type_e vt) { if(symtab_declared_in_current_scope( id.name)) { bail_with_prog_error(*(id.file_loc), "Variable \"%s\" has already been declared!", id.name); } else { int ofst_cnt = symtab_scope_loc_count(); id_attrs *attrs = id_attrs_loc_create( *(id.file_loc), vt, ofst_cnt); symtab_insert(id.name, attrs); } } CHECKING FOR UNDECLARED IDENTIFIERS Where would they be? - in statements - in expressions (inside statements) So need to: - walk the AST in each statment, walk the AST, look for undeclared identifier uses and in each subexpression walk the AST and look for undeclared identifier uses CHECKING STATEMENTS FOR UNDECLARED IDs // file ast.h // ... // kinds of statements typedef enum {assign_stmt, begin_stmt, if_stmt, read_stmt, write_stmt } stmt_kind_e; // forward declaration struct stmt_s; // assignStmt ::= ident = expr typedef struct { file_location *file_loc; const char *name; struct expr_s *expr; } assign_stmt_t; // stmts ::= { stmt } typedef struct { file_location *file_loc; struct stmt_s *stmts; } stmts_t; // beginStmt ::= begin varDecls stmts typedef struct { file_location *file_loc; var_decls_t var_decls; stmts_t stmts; } begin_stmt_t; // ... // stmt ::= assignStmt | beginStmt | ifStmt | readStmt | writeStmt typedef struct stmt_s { file_location *file_loc; struct stmt_s *next; // for lists stmt_kind_e stmt_kind; union { assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; } data; } stmt_t; DECLARATION CHECKING OF STATEMENTS (1/2) // file unparser.c // ... // Unparse stmt to out, // indented for the given level. void unparseStmt(FILE *out, stmt_t stmt, int indentLevel) { switch (stmt.stmt_kind) { case assign_stmt: unparseAssignStmt(out, stmt.data.assign_stmt, indentLevel); break; case begin_stmt: unparseBeginStmt(out, stmt.data.begin_stmt, indentLevel); break; case if_stmt: // ... } } DECLARATION CHECKING OF STATEMENTS (2/2) // check the statement to make sure that // all idenfifiers used have been declared // (if not, then produce an error) void scope_check_stmt(stmt_t stmt) { switch (stmt.stmt_kind) { case assign_stmt: scope_check_assignStmt(stmt.data.assign_stmt); break; case begin_stmt: scope_check_beginStmt(stmt.data.begin_stmt); break; case if_stmt: // ... } } DECLARATION CHECKING ASSIGNMENT STATEMENTS // file unparser.c // ... // Unparse stmt to out // with indentation level given by level, void unparseAssignStmt(FILE *out, assign_stmt_t stmt, int level) { indent(out, level); unparseIdent(stmt.name); fprintf(out, " = "); unparseExpr(out, *(stmt.expr)); semiAndNewline(out); } // check the statement for // undeclared identifiers void scope_check_assignStmt( assign_stmt_t stmt) { id_use * idu = scope_check_ident_declared(*(stmt.file_loc), stmt.name); assert(idu != NULL); scope_check_expr(*(stmt.expr)); } CHECKING AN INDIVIDUAL IDENTIFIER // check that name has been declared, // if so, then return an id_use for it // otherwise, produce an error id_use *scope_check_ident_declared( file_location floc, const char *name) { id_use *ret = symtab_lookup(name); if (ret == NULL) { // no declaration! bail_with_prog_error( floc, "identifier \"%s\" is not declared!", name); } return ret; } DECLARATION CHECKING OF BEGIN STATEMENTS // file unparser.c // ... // Unparse stmt to out // with indentation level given by level, void unparseBeginStmt(FILE *out, begin_stmt_t stmt, int level) { indent(out, level); fprintf(out, "{\n"); unparseVarDecls(out, stmt.var_decls, level+1); unparseStmts(out, stmt.stmts,level+1); indent(out, level); fprintf(out, "}\n"); } // file scope_check.c // ... // check the statement for // duplicate declarations and for // undeclared identifiers void scope_check_beginStmt(begin_stmt_t stmt) { symtab_enter_scope(); scope_check_varDecls(stmt.var_decls); scope_check_stmts(stmt.stmts); symtab_leave_scope(); } DECLARATION CHECKING EXPRESSIONS (1/3) // file ast.h // ... // kinds of expressions typedef enum {expr_bin_op, expr_ident, expr_number, expr_logical_not } expr_kind_e; // forward declaration for expressions struct expr_s; // expr ::= expr op expr // op ::= == | != | < | <= | + | - | * | / typedef struct { file_location *file_loc; struct expr_s *expr1; token_t op; struct expr_s *expr2; } binary_op_expr_t; // expr ::= expr op expr | ident | number typedef struct expr_s { file_location *file_loc; expr_kind_e expr_kind; union expr_u { binary_op_expr_t binary; ident_t ident; number_t number; struct expr_s *logical_not; } data; } expr_t; DECLARATION CHECKING EXPRESSIONS (2/3) // file unparser.c // ... // Unparse the expression exp to out // adding parentheses to indicate nesting void unparseExpr(FILE *out, expr_t exp) { switch (exp.expr_kind) { case expr_bin_op: unparseBinaryOpExpr(out, exp.data.binary); break; case expr_ident: unparseIdent(out, exp.data.ident); break; case expr_number: unparseNumber(out, exp.data.number); break; case expr_logical_not: unparseLogicalNotExpr(out, *(exp.data.logical_not)); break; default: bail_with_error(/* ... */); break; } } DECLARATION CHECKING EXPRESSIONS (3/3) // file scope_check.c // ... // check the expresion to make sure that // all idenfifiers used have been declared // (if not, then produce an error) void scope_check_expr(expr_t exp) { switch (exp.expr_kind) { case expr_bin_op: scope_check_binary_op_expr(exp.data.binary); break; case expr_ident: scope_check_ident_expr(exp.data.ident); break; case expr_number: // nothing to do, as no identifier uses possible break; case expr_logical_not: scope_check_expr(*(exp.data.logical_not)); break; default: bail_with_error(/* ... */); break; } DECLARATION CHECKING FOR BINARY OP EXPRs // check that all identifiers used in exp // have been declared // (if not, then produce an error) void scope_check_binary_op_expr(binary_op_expr_t exp) { scope_check_expr(*(exp.expr1)); // (note: no identifiers can occur in the operator) scope_check_expr(*(exp.expr2)); } DECLARATION CHECKING FOR IDENTIFIER EXPRs // check id to make sure that // all it has been declared // (if not, then produce an error) void scope_check_ident_expr(ident_t id) { scope_check_ident_declared(*(id.file_loc), id.name); } SUMMARY OF DECLARATION CHECKING Key concepts/data: - Abstract Syntax Trees (ASTs) - Attributes (id_attrs) - Symbol Table (symtab and scope) Algorithm: Walk the AST - call symtab_enter_scope(), whenever start a new potential scope - when walking over (processing) a declaration add a mapping to the symbol table, issue errors for duplicate declarations - when processing a statement/expression check that all identifiers used have been declared - call symtab_leave_scope() when finished with the potential scope