CC S02E06

April 24, 2013, 9:11 p.m.

Соберем воедино накопленные знания о flex, bison и llvm, чтобы реализовать простейший, но уже полноценный компилятор из языка с переменными, выражениями, присваиваниями и функцией print. Наш компилятор будет понимать, например, следующую программу:

a = 1
b = 2 + a
print 2 * a
print 2 * a * b

Все файлы можно скачать единым архивом. Ниже приведены лишь самые важные из них.

compile.sh

Реализуем "компилятор" в виде скрипта, сердцем которого будет наша программа compiler, генерирующая llvm байт-код на выходе.

#!/bin/bash

./compiler $1
llc-mp-3.2 -disable-cfi -mattr=-avx $1.bc
clang $1.s print.c -o $1.elf

Makefile

.PHONY: Build Lexer Parser

Build: Parser Lexer
    clang++ -o compiler `llvm-config-mp-3.2 --cppflags --ldflags --libs`\
                  lexer.cpp parser.cpp driver.cpp compiler.cpp\
                  ast_codegen.cpp ast_tostring.cpp ast.cpp

Lexer: lexer.l
    flex -o lexer.cpp lexer.l

Parser: parser.y
    bison -d -o parser.cpp parser.y

compiler.cpp

#include <iostream>
#include "driver.hpp"
#include "llvm/Module.h"
#include "llvm/Bitcode/ReaderWriter.h"

int main(int argc, const char* argv[])
{
  Driver driver;

  for (++argv; argv[0]; ++argv) {
    if (*argv == std::string ("-p"))
      driver.trace_parsing = true;
    else if (*argv == std::string ("-s"))
      driver.trace_scanning = true;
    else if (!driver.parse(*argv)) {
      std::cout << "; AST: " << *driver.result << std::endl;
      Module* theModule = driver.result->codegen();
      std::string errorString;
      raw_fd_ostream bitcode((std::string(*argv) + ".bc").c_str(), errorString, 0);
      WriteBitcodeToFile(theModule, bitcode);
      bitcode.close();
    }
  }

  return 0;
}

parser.y

%skeleton "lalr1.cc"
%require "2.5"
%defines
%define parser_class_name "Parser"

%code requires {
#include <string>
#include "ast.hpp"
class Driver;
}

%parse-param { Driver& driver }
%lex-param   { Driver& driver }

%debug
%error-verbose

%union {
    double dval;
    std::string* sval;
    ASTNode* ast;
    NumberASTNode* num;
    VariableASTNode* var;
    BlockASTNode* block;
    BinaryOpASTNode* binOp;
    UnitASTNode* unit;
}

%code {
#include "driver.hpp"
}

%token END 0 "end of file"
%token PRINT
%token PLUS
%token MINUS
%token TIMES
%token SLASH
%token LPAREN
%token RPAREN
%token EQUALS
%token SEMICOLON
%token DEF
%token EXTERN
%token <sval> IDENTIFIER "identifier"
%token <dval> NUMBER "number"
%type <ast> expr
%type <ast> print
%type <ast> statement
%type <ast> assignment
%type <block> statements
%type <unit> unit

%%
%start program;

program
    : unit { driver.result = $1; }
    ;

unit
    : statements { $$ = new UnitASTNode($1); }
    ;

statements
    : statements statement      { $$ = $1; $$->append($2); }
    | /* Blackjack and girls */ { $$ = new BlockASTNode(); }
    ;

statement
    : assignment
    | print
    ;

assignment
    : IDENTIFIER EQUALS expr
      { $$ = new BinaryOpASTNode('=', new VariableASTNode(*$1), $3); }
    ;

print
    : PRINT expr { $$ = new PrintCallASTNode($2); }
    ;

%left PLUS MINUS;
%left TIMES SLASH;
expr
    : expr PLUS expr    { $$ = new BinaryOpASTNode('+', $1, $3); }
    | expr MINUS expr   { $$ = new BinaryOpASTNode('-', $1, $3); }
    | expr TIMES expr   { $$ = new BinaryOpASTNode('*', $1, $3); }
    | expr SLASH expr   { $$ = new BinaryOpASTNode('/', $1, $3); }
    | IDENTIFIER        { $$ = new VariableASTNode(*$1); }
    | NUMBER            { $$ = new NumberASTNode($1); }
    ;
%%

void yy::Parser::error(const yy::location& l, const std::string& m)
{
    driver.error(m);
}

ast.hpp

#ifndef AST_H
#define AST_H

#include <ostream>
#include <sstream>
#include <vector>
#include <map>

#include "llvm/DerivedTypes.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/IRBuilder.h"

#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

Value* errorV(const char *str);
Function* errorF(const char *str);

static IRBuilder<> builder(getGlobalContext());
static std::map<std::string, Value*> namedValues;

class HasToString
{
  virtual std::string toString() const = 0;

 public:
  friend std::ostream& operator<<(std::ostream& os, const HasToString& o)
  {
    os << o.toString();
    return os;
  }
};

class ValueNode
{
 public:
  virtual Value* codegen() const = 0;
};

class BasicBlockNode
{
 public:
  virtual BasicBlock* codegen() const = 0;
};

class ASTNode : public HasToString, public ValueNode
{
 public:
  virtual ~ASTNode() {}
};

class BlockASTNode : public ASTNode, public BasicBlockNode
{
  typedef std::vector<ASTNode*> Container;
  Container nodesList_;
 public:
  void append(ASTNode* node)
  {
    nodesList_.push_back(node);
  }

  size_t size() const
  {
    return nodesList_.size();
  }

  const ASTNode* operator[](size_t i) const
  {
    return nodesList_[i];
  }

  virtual ~BlockASTNode()
  {
    for (Container::const_iterator it = nodesList_.begin(); it != nodesList_.end(); ++it)
      delete *it;
  }

  virtual std::string toString() const;
  virtual BasicBlock* codegen() const;
};

class NumberASTNode : public ASTNode, public ValueNode
{
  const double val_;
 public:
  NumberASTNode(double val) : val_(val) {}

  virtual std::string toString() const;
  virtual Value* codegen() const;
};

class VariableASTNode : public ASTNode, public ValueNode
{
  const std::string name_;
 public:
  VariableASTNode(const std::string& name) : name_(name) {}

  std::string getName() const
  {
    return name_;
  }

  virtual std::string toString() const;
  virtual Value* codegen() const;
};

class BinaryOpASTNode : public ASTNode, public ValueNode
{
  const char op_;
  const ASTNode* lhs_;
  const ASTNode* rhs_;
 public:
  BinaryOpASTNode(const char op, const ASTNode* lhs, const ASTNode* rhs) :
      op_(op),
      lhs_(lhs),
      rhs_(rhs)
  {
  }

  virtual ~BinaryOpASTNode()
  {
    delete lhs_;
    delete rhs_;
  }

  virtual std::string toString() const;
  virtual Value* codegen() const;
};

class PrintCallASTNode : public ASTNode, public ValueNode
{
  const ASTNode* param_;
 public:
  PrintCallASTNode(const ASTNode* param):
      param_(param)
  {
  }

  virtual ~PrintCallASTNode()
  {
    delete param_;
  }

  virtual std::string toString() const;
  virtual Value* codegen() const;
};

class UnitASTNode: public HasToString
{
  const BlockASTNode* statements_;
 public:
  UnitASTNode(const BlockASTNode* statements):
      statements_(statements)
  {
  }

  virtual ~UnitASTNode()
  {
    delete statements_;
  }

  virtual std::string toString() const;
  virtual Module* codegen() const;
};

#endif

ast_codegen.cpp

#include "ast.hpp"
#include <iostream>

Function* mainFunction;
Function* printFunction;

BasicBlock* BlockASTNode::codegen() const
{
  BasicBlock *bb = BasicBlock::Create(getGlobalContext(), "bb", mainFunction);
  builder.SetInsertPoint(bb);
  for (Container::const_iterator it = nodesList_.begin();
       it != nodesList_.end();
       ++it)
    (*it)->codegen();
  return bb;
}

Value* NumberASTNode::codegen() const
{
  return ConstantFP::get(getGlobalContext(), APFloat(val_));
}

Value* VariableASTNode::codegen() const
{
  Value* v = namedValues[name_];
  return v ? v : errorV((std::string("Unknown variable name: ") + name_).c_str());
}

Value* BinaryOpASTNode::codegen() const
{
  Value* l;
  if (op_ != '=')
    l = lhs_->codegen();
  Value* r = rhs_->codegen();
  if (r == 0 || (op_ != '=' && l == 0)) return 0;

  if (op_ == '+')
    return builder.CreateFAdd(l, r, "addtmp");
  else if (op_ == '-')
    return builder.CreateFSub(l, r, "subtmp");
  else if (op_ == '*')
    return builder.CreateFMul(l, r, "multmp");
  else if (op_ == '/')
    return builder.CreateFDiv(l, r, "divtmp");
  else if (op_ == '=') {
    std::string varName(static_cast<const VariableASTNode*>(lhs_)->getName());
    AllocaInst* l_var = builder.CreateAlloca(
        Type::getDoubleTy(getGlobalContext()), 0, "alloca_" + varName);
    builder.CreateStore(r, l_var);
    namedValues[varName] = builder.CreateLoad(l_var, "var_" + varName);
    return namedValues[varName];
  } else
    return errorV("invalid binary operator");
}

Value* PrintCallASTNode::codegen() const
{
  std::vector<Value*> args(1, param_->codegen());
  return builder.CreateCall(printFunction, args);
}

Module* UnitASTNode::codegen() const
{
  Module* theModule = new Module("myModule", getGlobalContext());

  {
    std::vector<Type*> params(1, Type::getDoubleTy(getGlobalContext()));
    FunctionType* functionType = FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false);
    printFunction = Function::Create(functionType, Function::ExternalLinkage, "print",
                                     theModule);
  }

  {
    std::vector<Type*> params;
    FunctionType* functionType =
        FunctionType::get(Type::getInt32Ty(getGlobalContext()), params, false);
    mainFunction = Function::Create(functionType, Function::ExternalLinkage, "main",
                                    theModule);
  }

  statements_->codegen();

  builder.SetInsertPoint(&mainFunction->back());
  Value* res = ConstantInt::get(getGlobalContext(), APInt(32, 0));
  builder.CreateRet(res);
  theModule->dump();
  return theModule;
}

print.c

#include <stdio.h>

void print(double v)
{
  printf("%g\n", v);
}

Домашнее задание

Добавить в язык условные операторы if, else, а также возможность определять и вызывать свои функции.

comments powered by Disqus