UP | HOME

Optimizer Project
Compiler Project
COP-5621

Table of Contents

1. Overview

In this project you will implement an interpreter for the While language using the ANTLR visitor of the While grammar from the Lexer project. Refer to While's semantics for specifics on its behavior.

2. Instructions

Create a python program called

~/cop5621spring25/compiler/compiler/Optimize.py

Optimize.py will take SimpleIR as input and output an equivalent, optimized SimpleIR program.

Implement control-flow analysis first (you can use python's networkx to make this easier).

Then implement one analysis and one optimization of your choice, e.g., reaching definitions and constant propagation, live variables and dead code elimination, etc. Or you can implement more sophisticated optimizations, like loop code motion or basic block elimination and merging.

Below is some starter code that will collect a list of instructions from a parsed SimpleIR program. It also contains a skeleton method for control-flow analysis that uses networkx to create a graph as well as some helper and illustration code for working with a networkx graph.

import os
import sys
sys.path.append('./')
import itertools
from collections import defaultdict
from antlr4 import *
from simpleir.SimpleIRLexer import SimpleIRLexer
from simpleir.SimpleIRParser import SimpleIRParser
from simpleir.SimpleIRListener import SimpleIRListener
import logging
logging.basicConfig(level=logging.DEBUG)
import networkx as nx

class IRList(SimpleIRListener):

  def __init__(self):
    self.instr = []

  # Enter a parse tree produced by SimpleIRParser#unit.
  def enterUnit(self, ctx:SimpleIRParser.UnitContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#function.
  def enterFunction(self, ctx:SimpleIRParser.FunctionContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#localVariables.
  def enterLocalVariables(self, ctx:SimpleIRParser.LocalVariablesContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#parameters.
  def enterParameters(self, ctx:SimpleIRParser.ParametersContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#statements.
  def enterStatements(self, ctx:SimpleIRParser.StatementsContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#returnStatement.
  def enterReturnStatement(self, ctx:SimpleIRParser.ReturnStatementContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#end.
  def enterEnd(self, ctx:SimpleIRParser.EndContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#statement.
  def enterStatement(self, ctx:SimpleIRParser.StatementContext):
    pass

  # Enter a parse tree produced by SimpleIRParser#operation.
  def enterOperation(self, ctx:SimpleIRParser.OperationContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#assign.
  def enterAssign(self, ctx:SimpleIRParser.AssignContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#dereference.
  def enterDereference(self, ctx:SimpleIRParser.DereferenceContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#reference.
  def enterReference(self, ctx:SimpleIRParser.ReferenceContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#assignDereference.
  def enterAssignDereference(self, ctx:SimpleIRParser.AssignDereferenceContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#call.
  def enterCall(self, ctx:SimpleIRParser.CallContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#label.
  def enterLabel(self, ctx:SimpleIRParser.LabelContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#gotoStatement.
  def enterGotoStatement(self, ctx:SimpleIRParser.GotoStatementContext):
    self.instr.append(ctx)

  # Enter a parse tree produced by SimpleIRParser#ifGoto.
  def enterIfGoto(self, ctx:SimpleIRParser.IfGotoContext):
    self.instr.append(ctx)

def visualize_graph(cfg, filename="graph.png"):
  # visualize graph
  A = nx.nx_agraph.to_agraph(cfg)
  # A.layout()
  A.draw(filename, prog="dot")

def textualize_graph(cfg):
  text = "Control Flow Graph\n"
  for node in sorted(cfg.nodes):
    text += f'BB{node}'
    text += str(", ".join([ f'->BB{x}{cfg[node][x]}' for x in cfg[node].keys() ]))
    text += "\n"
    text += "\n".join([ str(instr) for instr in cfg.nodes[node]["instrs"] ])
    text += "\n" if len(cfg.nodes[node]["instrs"]) > 0 else ""
    text += "\n"
  return text

def controlflow(instrs):
  """Takes a list of while3addr instructions and produces a control-flow graph as a networkx digraph"""
  assert len(instrs) > 0 # assume program has at least one instruction
  leaders = set()
  leaders.add(0) # 0 is always the beginning of the program and the first leader
  exitblock = len(instrs)
  leaders.add(exitblock) # add a leader for the exit block

  # bbranges = [ (sortedleaders[x], sortedleaders[x + 1]) for x in range(0, len(sortedleaders) - 1) ]

  # # collect basic blocks, mapping their leaders to their lists of instructions
  # bbinstrs = {} # map leader to instructions
  # bbnums = {}
  # bbnum = 1

  # # add exit block
  # bbinstrs[exitblock] = []
  # bbnums[exitblock] = bbnum

  # # collect edges and remove gotos since edges replace pc
  # edges = []
  # nodes = [ (bbnums[leader], {'instrs': bbinstrs[leader]}) for leader in bbinstrs.keys() ]

  # # add entry block
  # nodes.append((0, {'instrs': []}))
  # edges.append((0, 1))
  # logging.debug(f'{nodes}')
  # logging.debug(f'{edges}')

  cfg = nx.DiGraph()
  # cfg.add_nodes_from(nodes)
  # cfg.add_edges_from(edges)

  return cfg

def reversecfg(cfg):
  """Reverse the graph edges and block instructions for backwards analysis."""
  cfg = cfg.reverse() 
  for node in cfg.nodes:
    cfg.nodes[node]['instrs'] = list(reversed(cfg.nodes[node]['instrs']))
  return cfg

def dataflow(cfg):
  """Perform a data-flow analysis on a cfg."""

  # # initialize state
  # for node in cfg.nodes:
  #   cfg.nodes[node]['instate'] = set()
  #   cfg.nodes[node]['outstate'] = set()
  #   cfg.nodes[node]['analysis'] = None

  # cfg.nodes[entry_node]['instate'] = initial_state

  # worklist = deque(cfg.nodes)
  # while len(worklist) > 0:
  #   node = worklist.popleft()
  #   # logging.debug(f'iteration: {node}')
  #   cfg.nodes[node]['analysis'], cfg.nodes[node]['outstate'] = analyzeblock(cfg.nodes[node]['instate'], cfg.nodes[node]['instrs'], transfer)
  #   for succ in cfg.successors(node):
  #     newinput = join(cfg.nodes[succ]['instate'], cfg.nodes[node]['outstate'])
  #     if newinput != cfg.nodes[succ]['instate']:
  #       cfg.nodes[succ]['instate'] = newinput
  #       worklist.append(succ)

  return cfg

if len(sys.argv) > 1:
  # create a mapping from filenames to their input streams
  filepaths = sys.argv[1:]
  streams = { os.path.basename(filepath): FileStream(filepath) for filepath in filepaths }
else:
  # create a single mapping from stdin to the input stream
  input_stream = StdinStream()
  filename = "stdin"
  streams = { filename: input_stream }

# generate an interpreter for each function by running the listener
lexer = SimpleIRLexer(input_stream)
stream = CommonTokenStream(lexer)
parser = SimpleIRParser(stream)
tree = parser.unit()
if parser.getNumberOfSyntaxErrors() > 0:
  print("syntax errors")
  exit(1)
walker = ParseTreeWalker()
irl = IRList()
walker.walk(irl, tree)
print(irl.instr)

3. An algorithm for CFG construction

  1. Create entry and exit nodes (used for optimization later)
  2. Collect the leaders of the basic blocks, i.e., each line that starts a basic block, and creating a new node for each
  3. create edges between basic blocks, looking at each basic block's ending goto/ifgoto and create an edge to the target leader

How do we know when something is a leader, i.e., the first line of a basic block?

  1. First line of the program
  2. Target of a goto/ifgoto
  3. Line after a goto/ifgoto

Author: Paul Gazzillo

Created: 2025-03-26 Wed 16:06

Validate