Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Imugi: Compiler made with Python

456 views

Published on

[PyConKR 2018] Imugi: Compiler made with Python.
https://www.pycon.kr/2018/program/2/

GitHub: https://github.com/hahnlee/imugi

Published in: Engineering
  • Be the first to comment

Imugi: Compiler made with Python

  1. 1. !
  2. 2. Compiler construction is a microcosm of computer science
  3. 3. a = 1 # ref_cnt of a = 1 b = a # ref_cnt of a = 2 c = a + b # ref_cnt of a 2 -> 3 -> 2 sum_function(a) # ref_cnt of a 3
  4. 4. # A very basic HTTP server require "http/server" server = HTTP::Server.new do |context| context.response.content_type = "text/plain" context.response.print "Hello world, got #{context.request.path}!" end puts "Listening on http://127.0.0.1:8080" server.listen(8080)
  5. 5. ! " with open('foo.py', 'rb') as f: import ast
  6. 6. import tokenize import ast
  7. 7. ! " import unittest import ctypes import this
  8. 8. AST = 'is Tree' String Token(type=NAME, value='AST') Token(type=OP, value='=') Token(type=STRING, value="'is Tree'") Tokens
  9. 9. keywords = {'if', 'else', 'return'} import re token_specification = [ ('NUMBER', r'd+(.d*)?'), # Integer or decimal number ('ASSIGN', r'='), # Assignment operator ('ID', r'[A-Za-z]+'), # Identifiers ('OP', r'[+-*/]'), # Arithmetic operators ('NEWLINE', r'n'), # Line endings ('SKIP', r'[ t]+'), # Skip over spaces and tabs ('MISMATCH',r'.'), # Any other character ] tok_regex = '|'.join( '(?P<%s>%s)' % pair for pair in token_specification ) def tokenize(code): for mo in re.finditer(tok_regex, code): kind = mo.lastgroup value = mo.group(kind) if kind == 'NEWLINE': elif kind == 'SKIP': pass elif kind == 'MISMATCH': raise RuntimeError(f'{value!r} unexpected') else: if kind == 'ID' and value in keywords: kind = value column = mo.start() - line_start yield Token(kind, value)
  10. 10. import tokenize def say_hello(): print("Hello, World!") 0,0-0,0: ENCODING 'utf-8' 1,0-1,3: NAME 'def' 1,4-1,13: NAME 'say_hello' 1,13-1,14: OP '(' 1,14-1,15: OP ')' 1,15-1,16: OP ':' 1,16-1,17: NEWLINE 'n' 2,0-2,4: INDENT ' ' 2,4-2,9: NAME 'print' 2,9-2,10: OP '(' 2,10-2,25: STRING '"Hello, World!"' 2,25-2,26: OP ')' 2,26-2,27: NEWLINE 'n' 3,0-3,0: DEDENT '' 3,0-3,0: ENDMARKER ''
  11. 11. Assign( targets=[ Name(id='AST') ], value=Str(value="'is Tree'"), ) Token(type=NAME, value='AST') Token(type=OP, value='=') Token(type=STRING, value="'is Tree'") Tokens AST Bin OP Bin OP + a * cb a + b * c
  12. 12. token = tokenize(code) while token is not None: if token.type == NAME: if token.value == 'def': tree = FunctionDef() next_token = token.next() token = token.next() Token(type=NUMBER, value='0') Token(type=NAME, value='sum') if next_token.type != NAME: raise SyntaxError()
  13. 13. AST std::string AST = "is Tree" Target Code Assign( targets=[ Name(id='AST') ], value=Str(value="'is Tree'"), ) b b, c mul $t0, b, c add $t1, $t0, a ASM Bin OP Bin OP + a * cb
  14. 14. Bin OP Bin OP + a * cb stmt = FunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns) | AsyncFunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns) | ClassDef(identifier name, expr* bases, keyword* keywords, stmt* body, expr* decorator_list) | Return(expr? value) | Delete(expr* targets) | Assign(expr* targets, expr value) | AugAssign(expr target, operator op, expr value) -- 'simple' indicates that we annotate simple name without parens | AnnAssign(expr target, expr annotation, expr? value, int simple) -- use 'orelse' because else is a keyword in target languages | For(expr target, expr iter, stmt* body, stmt* orelse) | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse) | While(expr test, stmt* body, stmt* orelse) | If(expr test, stmt* body, stmt* orelse) | With(withitem* items, stmt* body) | AsyncWith(withitem* items, stmt* body) | Raise(expr? exc, expr? cause) | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) | Assert(expr test, expr? msg) | Import(alias* names) | ImportFrom(identifier? module, alias* names, int? level) | Global(identifier* names) | Nonlocal(identifier* names) | Expr(expr value) | Pass | Break | Continue class NodeVisitor(object): def visit(self, node): """Visit a node.""" method = 'visit_' + node.__class__.__name__ visitor = getattr(self, method, self.generic_visit) return visitor(node) visit_Num() visit_Return()
  15. 15. symbol_table = {} Assign( targets=[ Name(id='foo') ], value=Str(value="'bar'"), )target_code = '' symbol_table = { 'foo': str, } target_code = "string foo = 'bar'" Symobl(type=[str], pointer=some_location, etc=[]) symbol_table = { 'foo': str, } Name(id='a') raise SyntaxError()
  16. 16. int *ptr_one; ptr_one = (int *) malloc(sizeof(int)); free(ptr_one) foo = 100000 # ... # Automatically release register int foo = 42; • •
  17. 17. 10 // 4 10 // 4 * 3 10 >> 2 10 - 10 >> 2 foo = {} foo['a'] = 1 foo['b'] = 2 foo['c'] = 3 foo = { 'a': 1, 'b': 2, 'c': 3, } ⚙
  18. 18. add $s0, $t0, $t1 sub $t2, $s0, $t3 add $t3, $s0, $t4 and $t7, $t5, $t4 add $s0, $t0, $t1 and $t7, $t5, $t4 sub $t2, $s0, $t3 add $t3, $s0, $t4
  19. 19. A microcosm of computer science
  20. 20. typedef struct { char *tp_name; } A; typedef struct { char *tp_name; int value; float rate; } B; unsigned add1(unsigned a, unsigned b) { return a + b; } unsigned add2(unsigned a, unsigned b) { if (a == 0) return b; return add2(a - 1, b + 1); } %struct.A = type { i8* } %struct.B = type { i8*, i32, float } define i32 @add1(i32 %a, i32 %b) { entry: %tmp1 = add i32 %a, %b ret i32 %tmp1 } define i32 @add2(i32 %a, i32 %b) { entry: %tmp1 = icmp eq i32 %a, 0 br i1 %tmp1, label %done, label %recurse recurse: %tmp2 = sub i32 %a, 1 %tmp3 = add i32 %b, 1 %tmp4 = call i32 @add2(i32 %tmp2, i32 %tmp3) ret i32 %tmp4 done: ret i32 %b }
  21. 21. from numba import jit from numpy import arrange @jit def sum2d(arr): M, N = arr.shape result = 0.0 for i in range(M): for j in range(N): result += arr[i,j] return result a = arange(9).reshape(3,3) print(sum2d(a))
  22. 22. from llvmlite import ir # Create some useful types double = ir.DoubleType() fnty = ir.FunctionType(double, (double, double)) # Create an empty module... module = ir.Module(name=__file__) # and declare a function named "fpadd" inside it func = ir.Function(module, fnty, name="fpadd") # Now implement the function block = func.append_basic_block(name="entry") builder = ir.IRBuilder(block) a, b = func.args result = builder.fadd(a, b, name="res") builder.ret(result) # Print the module IR print(module)
  23. 23. ; ModuleID = "examples/ir_fpadd.py" target triple = "unknown-unknown-unknown" target datalayout = "" define double @"fpadd"(double %".1", double %".2") { entry: %"res" = fadd double %".1", %".2" ret double %"res" } from llvmlite import ir # Create some useful types double = ir.DoubleType() fnty = ir.FunctionType(double, (double, double)) # Create an empty module... module = ir.Module(name=__file__) # and declare a function named "fpadd" inside it func = ir.Function(module, fnty, name="fpadd") # Now implement the function block = func.append_basic_block(name="entry") builder = ir.IRBuilder(block) a, b = func.args result = builder.fadd(a, b, name="res") builder.ret(result) # Print the module IR print(module)
  24. 24. import llvmlite.binding as llvm llvm.initialize() llvm.initialize_native_target() llvm.initialize_native_asmprinter() def create_execution_engine(): target = llvm.Target.from_default_triple() target_machine = target.create_target_machine() backing_mod = llvm.parse_assembly("") engine = llvm.create_mcjit_compiler(backing_mod, target_machine) return engine def compile_ir(engine, llvm_ir): mod = llvm.parse_assembly(llvm_ir) mod.verify() engine.add_module(mod) engine.finalize_object() engine.run_static_constructors() return mod engine = create_execution_engine() mod = compile_ir(engine, llvm_ir) from ctypes import CFUNCTYPE, c_double func_ptr = engine.get_function_address("fpadd") cfunc = CFUNCTYPE(c_double, c_double, c_double)(func_ptr) res = cfunc(1.0, 3.5)
  25. 25. !
  26. 26. from ast import ( FunctionDef, NodeVisitor, ) from llvmlite import ( ir, ) class CodeGen(NodeVisitor): def visit_FunctionDef(self, node: FunctionDef): func_name = node.name func_args_types = [self.get_type(arg.annotation.id) for arg in node.args.args] func_return_type = self.get_type(node.returns.id) func_type = ir.FunctionType(func_return_type, func_args_types) func = ir.Function(self.module, func_type, func_name)
  27. 27. def sum(a: int, b: int) -> int: return a + b * 3 + 4 define i32 @sum(i32 %a, i32 %b) { entry: %multmp = mul i32 %b, 3 %addtmp = add i32 %a, 4 %addtmp.1 = add i32 %addtmp, %multmp ret i32 %addtmp.1 } _sum: leal (%rsi,%rsi,2), %eax leal 4(%rdi,%rax), %eax retq
  28. 28. Primitive Type VS Object Need Runtime or Not? CPython’s CAPI or not? RC VS GC
  29. 29. typedef struct _object { Py_ssize_t ob_refcnt; struct _typeobject *ob_type; } PyObject; typedef struct { PyObject ob_base; Py_ssize_t ob_size; } PyVarObject; typedef struct _longobject { PyVarObject ob_base; digit ob_digit[1]; } PyLongObject; static PyObject * some_function() { return (PyObject *) PyLongObject …; }
  30. 30. foo = 1 isinstance(foo, int) foo = 'bar' isinstance(foo, str) foo = True isinstance(foo, bool) class NodeVisitor(object): def visit(self, node): """Visit a node.""" method = 'visit_' + node.__class__.__name__ visitor = getattr(self, method, self.generic_visit) return visitor(node)
  31. 31. class A: pass foo = A() print(foo.bar) #'A' object has no attribute 'bar' bar = A() bar.bar = 3 print(bar.bar) # 3 print(foo.bar) # 'A' object has no attribute 'bar' A.foo = 10 print(foo.foo) # 10 print(bar.foo) # 10 class TestClass(object): name = 'TestClass’ foo = TestClass() class TestClass(object): nickname = 'TestClass2' bar = TestClass() print(foo.name) # TestClass print(foo.nickname) # 'TestClass' object has no attribute 'nickname' print(bar.name) # 'TestClass' object has no attribute 'name' print(bar.nickname) # TestClass2
  32. 32. § § § §
  33. 33. def foo(bar: int) -> int: pass
  34. 34. !

×