Соберем воедино накопленные знания о flex, bison и llvm, чтобы реализовать простейший, но уже полноценный компилятор из языка с переменными, выражениями, присваиваниями и функцией print. Наш компилятор будет понимать, например, следующую программу:
a = 1
b = 2 + a
print 2 * a
print 2 * a * b
Все файлы можно скачать единым архивом. Ниже приведены лишь самые важные из них.
compile.sh
Реализуем "компилятор" в виде скрипта, сердцем которого будет наша программа compiler, генерирующая llvm байт-код на выходе.
#!/bin/bash
./compiler $1
llc-mp-3.2 -disable-cfi -mattr=-avx $1.bc
clang $1.s print.c -o $1.elf
Makefile
.PHONY: Build Lexer Parser
Build: Parser Lexer
clang++ -o compiler `llvm-config-mp-3.2 --cppflags --ldflags --libs`\
lexer.cpp parser.cpp driver.cpp compiler.cpp\
ast_codegen.cpp ast_tostring.cpp ast.cpp
Lexer: lexer.l
flex -o lexer.cpp lexer.l
Parser: parser.y
bison -d -o parser.cpp parser.y
compiler.cpp
#include <iostream>
#include "driver.hpp"
#include "llvm/Module.h"
#include "llvm/Bitcode/ReaderWriter.h"
int main(int argc, const char* argv[])
{
Driver driver;
for (++argv; argv[0]; ++argv) {
if (*argv == std::string ("-p"))
driver.trace_parsing = true;
else if (*argv == std::string ("-s"))
driver.trace_scanning = true;
else if (!driver.parse(*argv)) {
std::cout << "; AST: " << *driver.result << std::endl;
Module* theModule = driver.result->codegen();
std::string errorString;
raw_fd_ostream bitcode((std::string(*argv) + ".bc").c_str(), errorString, 0);
WriteBitcodeToFile(theModule, bitcode);
bitcode.close();
}
}
return 0;
}
parser.y
%skeleton "lalr1.cc"
%require "2.5"
%defines
%define parser_class_name "Parser"
%code requires {
#include <string>
#include "ast.hpp"
class Driver;
}
%parse-param { Driver& driver }
%lex-param { Driver& driver }
%debug
%error-verbose
%union {
double dval;
std::string* sval;
ASTNode* ast;
NumberASTNode* num;
VariableASTNode* var;
BlockASTNode* block;
BinaryOpASTNode* binOp;
UnitASTNode* unit;
}
%code {
#include "driver.hpp"
}
%token END 0 "end of file"
%token PRINT
%token PLUS
%token MINUS
%token TIMES
%token SLASH
%token LPAREN
%token RPAREN
%token EQUALS
%token SEMICOLON
%token DEF
%token EXTERN
%token <sval> IDENTIFIER "identifier"
%token <dval> NUMBER "number"
%type <ast> expr
%type <ast> print
%type <ast> statement
%type <ast> assignment
%type <block> statements
%type <unit> unit
%%
%start program;
program
: unit { driver.result = $1; }
;
unit
: statements { $$ = new UnitASTNode($1); }
;
statements
: statements statement { $$ = $1; $$->append($2); }
| /* Blackjack and girls */ { $$ = new BlockASTNode(); }
;
statement
: assignment
| print
;
assignment
: IDENTIFIER EQUALS expr
{ $$ = new BinaryOpASTNode('=', new VariableASTNode(*$1), $3); }
;
print
: PRINT expr { $$ = new PrintCallASTNode($2); }
;
%left PLUS MINUS;
%left TIMES SLASH;
expr
: expr PLUS expr { $$ = new BinaryOpASTNode('+', $1, $3); }
| expr MINUS expr { $$ = new BinaryOpASTNode('-', $1, $3); }
| expr TIMES expr { $$ = new BinaryOpASTNode('*', $1, $3); }
| expr SLASH expr { $$ = new BinaryOpASTNode('/', $1, $3); }
| IDENTIFIER { $$ = new VariableASTNode(*$1); }
| NUMBER { $$ = new NumberASTNode($1); }
;
%%
void yy::Parser::error(const yy::location& l, const std::string& m)
{
driver.error(m);
}
ast.hpp
#ifndef AST_H
#define AST_H
#include <ostream>
#include <sstream>
#include <vector>
#include <map>
#include "llvm/DerivedTypes.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/IRBuilder.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
Value* errorV(const char *str);
Function* errorF(const char *str);
static IRBuilder<> builder(getGlobalContext());
static std::map<std::string, Value*> namedValues;
class HasToString
{
virtual std::string toString() const = 0;
public:
friend std::ostream& operator<<(std::ostream& os, const HasToString& o)
{
os << o.toString();
return os;
}
};
class ValueNode
{
public:
virtual Value* codegen() const = 0;
};
class BasicBlockNode
{
public:
virtual BasicBlock* codegen() const = 0;
};
class ASTNode : public HasToString, public ValueNode
{
public:
virtual ~ASTNode() {}
};
class BlockASTNode : public ASTNode, public BasicBlockNode
{
typedef std::vector<ASTNode*> Container;
Container nodesList_;
public:
void append(ASTNode* node)
{
nodesList_.push_back(node);
}
size_t size() const
{
return nodesList_.size();
}
const ASTNode* operator[](size_t i) const
{
return nodesList_[i];
}
virtual ~BlockASTNode()
{
for (Container::const_iterator it = nodesList_.begin(); it != nodesList_.end(); ++it)
delete *it;
}
virtual std::string toString() const;
virtual BasicBlock* codegen() const;
};
class NumberASTNode : public ASTNode, public ValueNode
{
const double val_;
public:
NumberASTNode(double val) : val_(val) {}
virtual std::string toString() const;
virtual Value* codegen() const;
};
class VariableASTNode : public ASTNode, public ValueNode
{
const std::string name_;
public:
VariableASTNode(const std::string& name) : name_(name) {}
std::string getName() const
{
return name_;
}
virtual std::string toString() const;
virtual Value* codegen() const;
};
class BinaryOpASTNode : public ASTNode, public ValueNode
{
const char op_;
const ASTNode* lhs_;
const ASTNode* rhs_;
public:
BinaryOpASTNode(const char op, const ASTNode* lhs, const ASTNode* rhs) :
op_(op),
lhs_(lhs),
rhs_(rhs)
{
}
virtual ~BinaryOpASTNode()
{
delete lhs_;
delete rhs_;
}
virtual std::string toString() const;
virtual Value* codegen() const;
};
class PrintCallASTNode : public ASTNode, public ValueNode
{
const ASTNode* param_;
public:
PrintCallASTNode(const ASTNode* param):
param_(param)
{
}
virtual ~PrintCallASTNode()
{
delete param_;
}
virtual std::string toString() const;
virtual Value* codegen() const;
};
class UnitASTNode: public HasToString
{
const BlockASTNode* statements_;
public:
UnitASTNode(const BlockASTNode* statements):
statements_(statements)
{
}
virtual ~UnitASTNode()
{
delete statements_;
}
virtual std::string toString() const;
virtual Module* codegen() const;
};
#endif
ast_codegen.cpp
#include "ast.hpp"
#include <iostream>
Function* mainFunction;
Function* printFunction;
BasicBlock* BlockASTNode::codegen() const
{
BasicBlock *bb = BasicBlock::Create(getGlobalContext(), "bb", mainFunction);
builder.SetInsertPoint(bb);
for (Container::const_iterator it = nodesList_.begin();
it != nodesList_.end();
++it)
(*it)->codegen();
return bb;
}
Value* NumberASTNode::codegen() const
{
return ConstantFP::get(getGlobalContext(), APFloat(val_));
}
Value* VariableASTNode::codegen() const
{
Value* v = namedValues[name_];
return v ? v : errorV((std::string("Unknown variable name: ") + name_).c_str());
}
Value* BinaryOpASTNode::codegen() const
{
Value* l;
if (op_ != '=')
l = lhs_->codegen();
Value* r = rhs_->codegen();
if (r == 0 || (op_ != '=' && l == 0)) return 0;
if (op_ == '+')
return builder.CreateFAdd(l, r, "addtmp");
else if (op_ == '-')
return builder.CreateFSub(l, r, "subtmp");
else if (op_ == '*')
return builder.CreateFMul(l, r, "multmp");
else if (op_ == '/')
return builder.CreateFDiv(l, r, "divtmp");
else if (op_ == '=') {
std::string varName(static_cast<const VariableASTNode*>(lhs_)->getName());
AllocaInst* l_var = builder.CreateAlloca(
Type::getDoubleTy(getGlobalContext()), 0, "alloca_" + varName);
builder.CreateStore(r, l_var);
namedValues[varName] = builder.CreateLoad(l_var, "var_" + varName);
return namedValues[varName];
} else
return errorV("invalid binary operator");
}
Value* PrintCallASTNode::codegen() const
{
std::vector<Value*> args(1, param_->codegen());
return builder.CreateCall(printFunction, args);
}
Module* UnitASTNode::codegen() const
{
Module* theModule = new Module("myModule", getGlobalContext());
{
std::vector<Type*> params(1, Type::getDoubleTy(getGlobalContext()));
FunctionType* functionType = FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false);
printFunction = Function::Create(functionType, Function::ExternalLinkage, "print",
theModule);
}
{
std::vector<Type*> params;
FunctionType* functionType =
FunctionType::get(Type::getInt32Ty(getGlobalContext()), params, false);
mainFunction = Function::Create(functionType, Function::ExternalLinkage, "main",
theModule);
}
statements_->codegen();
builder.SetInsertPoint(&mainFunction->back());
Value* res = ConstantInt::get(getGlobalContext(), APInt(32, 0));
builder.CreateRet(res);
theModule->dump();
return theModule;
}
print.c
#include <stdio.h>
void print(double v)
{
printf("%g\n", v);
}
Домашнее задание
Добавить в язык условные операторы if, else, а также возможность определять и вызывать свои функции.