Optimizer Project
Compiler Project
COP-5621
Table of Contents
1. Overview
In this project you will implement an interpreter for the While language using the ANTLR visitor of the While grammar from the Lexer project. Refer to While's semantics for specifics on its behavior.
2. Instructions
Create a python program called
~/cop5621spring25/compiler/compiler/Optimize.py
Optimize.py will take SimpleIR as input and output an equivalent, optimized SimpleIR program.
Implement control-flow analysis first (you can use python's networkx to make this easier).
Then implement one analysis and one optimization of your choice, e.g., reaching definitions and constant propagation, live variables and dead code elimination, etc. Or you can implement more sophisticated optimizations, like loop code motion or basic block elimination and merging.
Below is some starter code that will collect a list of instructions from a parsed SimpleIR program. It also contains a skeleton method for control-flow analysis that uses networkx to create a graph as well as some helper and illustration code for working with a networkx graph.
import os import sys sys.path.append('./') import itertools from collections import defaultdict from antlr4 import * from simpleir.SimpleIRLexer import SimpleIRLexer from simpleir.SimpleIRParser import SimpleIRParser from simpleir.SimpleIRListener import SimpleIRListener import logging logging.basicConfig(level=logging.DEBUG) import networkx as nx class IRList(SimpleIRListener): def __init__(self): self.instr = [] # Enter a parse tree produced by SimpleIRParser#unit. def enterUnit(self, ctx:SimpleIRParser.UnitContext): pass # Enter a parse tree produced by SimpleIRParser#function. def enterFunction(self, ctx:SimpleIRParser.FunctionContext): pass # Enter a parse tree produced by SimpleIRParser#localVariables. def enterLocalVariables(self, ctx:SimpleIRParser.LocalVariablesContext): pass # Enter a parse tree produced by SimpleIRParser#parameters. def enterParameters(self, ctx:SimpleIRParser.ParametersContext): pass # Enter a parse tree produced by SimpleIRParser#statements. def enterStatements(self, ctx:SimpleIRParser.StatementsContext): pass # Enter a parse tree produced by SimpleIRParser#returnStatement. def enterReturnStatement(self, ctx:SimpleIRParser.ReturnStatementContext): pass # Enter a parse tree produced by SimpleIRParser#end. def enterEnd(self, ctx:SimpleIRParser.EndContext): pass # Enter a parse tree produced by SimpleIRParser#statement. def enterStatement(self, ctx:SimpleIRParser.StatementContext): pass # Enter a parse tree produced by SimpleIRParser#operation. def enterOperation(self, ctx:SimpleIRParser.OperationContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#assign. def enterAssign(self, ctx:SimpleIRParser.AssignContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#dereference. def enterDereference(self, ctx:SimpleIRParser.DereferenceContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#reference. def enterReference(self, ctx:SimpleIRParser.ReferenceContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#assignDereference. def enterAssignDereference(self, ctx:SimpleIRParser.AssignDereferenceContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#call. def enterCall(self, ctx:SimpleIRParser.CallContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#label. def enterLabel(self, ctx:SimpleIRParser.LabelContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#gotoStatement. def enterGotoStatement(self, ctx:SimpleIRParser.GotoStatementContext): self.instr.append(ctx) # Enter a parse tree produced by SimpleIRParser#ifGoto. def enterIfGoto(self, ctx:SimpleIRParser.IfGotoContext): self.instr.append(ctx) def visualize_graph(cfg, filename="graph.png"): # visualize graph A = nx.nx_agraph.to_agraph(cfg) # A.layout() A.draw(filename, prog="dot") def textualize_graph(cfg): text = "Control Flow Graph\n" for node in sorted(cfg.nodes): text += f'BB{node}' text += str(", ".join([ f'->BB{x}{cfg[node][x]}' for x in cfg[node].keys() ])) text += "\n" text += "\n".join([ str(instr) for instr in cfg.nodes[node]["instrs"] ]) text += "\n" if len(cfg.nodes[node]["instrs"]) > 0 else "" text += "\n" return text def controlflow(instrs): """Takes a list of while3addr instructions and produces a control-flow graph as a networkx digraph""" assert len(instrs) > 0 # assume program has at least one instruction leaders = set() leaders.add(0) # 0 is always the beginning of the program and the first leader exitblock = len(instrs) leaders.add(exitblock) # add a leader for the exit block # bbranges = [ (sortedleaders[x], sortedleaders[x + 1]) for x in range(0, len(sortedleaders) - 1) ] # # collect basic blocks, mapping their leaders to their lists of instructions # bbinstrs = {} # map leader to instructions # bbnums = {} # bbnum = 1 # # add exit block # bbinstrs[exitblock] = [] # bbnums[exitblock] = bbnum # # collect edges and remove gotos since edges replace pc # edges = [] # nodes = [ (bbnums[leader], {'instrs': bbinstrs[leader]}) for leader in bbinstrs.keys() ] # # add entry block # nodes.append((0, {'instrs': []})) # edges.append((0, 1)) # logging.debug(f'{nodes}') # logging.debug(f'{edges}') cfg = nx.DiGraph() # cfg.add_nodes_from(nodes) # cfg.add_edges_from(edges) return cfg def reversecfg(cfg): """Reverse the graph edges and block instructions for backwards analysis.""" cfg = cfg.reverse() for node in cfg.nodes: cfg.nodes[node]['instrs'] = list(reversed(cfg.nodes[node]['instrs'])) return cfg def dataflow(cfg): """Perform a data-flow analysis on a cfg.""" # # initialize state # for node in cfg.nodes: # cfg.nodes[node]['instate'] = set() # cfg.nodes[node]['outstate'] = set() # cfg.nodes[node]['analysis'] = None # cfg.nodes[entry_node]['instate'] = initial_state # worklist = deque(cfg.nodes) # while len(worklist) > 0: # node = worklist.popleft() # # logging.debug(f'iteration: {node}') # cfg.nodes[node]['analysis'], cfg.nodes[node]['outstate'] = analyzeblock(cfg.nodes[node]['instate'], cfg.nodes[node]['instrs'], transfer) # for succ in cfg.successors(node): # newinput = join(cfg.nodes[succ]['instate'], cfg.nodes[node]['outstate']) # if newinput != cfg.nodes[succ]['instate']: # cfg.nodes[succ]['instate'] = newinput # worklist.append(succ) return cfg if len(sys.argv) > 1: # create a mapping from filenames to their input streams filepaths = sys.argv[1:] streams = { os.path.basename(filepath): FileStream(filepath) for filepath in filepaths } else: # create a single mapping from stdin to the input stream input_stream = StdinStream() filename = "stdin" streams = { filename: input_stream } # generate an interpreter for each function by running the listener lexer = SimpleIRLexer(input_stream) stream = CommonTokenStream(lexer) parser = SimpleIRParser(stream) tree = parser.unit() if parser.getNumberOfSyntaxErrors() > 0: print("syntax errors") exit(1) walker = ParseTreeWalker() irl = IRList() walker.walk(irl, tree) print(irl.instr)
3. An algorithm for CFG construction
- Create entry and exit nodes (used for optimization later)
- Collect the leaders of the basic blocks, i.e., each line that starts a basic block, and creating a new node for each
- create edges between basic blocks, looking at each basic block's ending goto/ifgoto and create an edge to the target leader
How do we know when something is a leader, i.e., the first line of a basic block?
- First line of the program
- Target of a goto/ifgoto
- Line after a goto/ifgoto