昨日に引き続き、「Javaでコンパイラの基礎を理解する*1」に取り組みました。
svm1c.rb
class Compiler IADD = 96 ISUB = 100 IMUL = 104 IDIV = 108 BIPUSH = 16 PRINT = -48 def compile(filename) scanner = Scanner.new(filename) tokenList = scanner.createTokenList parser = Parser.new ae = parser.execute(tokenList) objectCodeList = [] ae.compile(objectCodeList) objectCodeList << PRINT write("a.svm", objectCodeList) end private def write(filename, code) File.open(filename, "w") do |f| f.write(code.pack("c*")) end end end class Parser def execute(list) return createProgram(list) end def createProgram(list) t = list.shift if (t.type != TokenUtil::KEYWORD) raise SyntaxError end if ("main" != t.s) raise SyntaxError end if (!check(list, TokenUtil::L_BRACE)) raise SyntaxError end list.shift # L_BRACE e = createExpression(list) if (!check(list, TokenUtil::R_BRACE)) raise SyntaxError end list.shift # R_BRACE p = Program.new p.value = t p.add(e) return p end def createExpression(list) t = createTerm(list) while (list.size > 0) break if (!check(list, TokenUtil::OPE_AS)) op = list.shift t2 = createTerm(list) e = Expression.new e.value = op e.add(t) e.add(t2) t = e end return t end def createTerm(list) f = createFactor(list) while (list.size > 0) break if (!check(list, TokenUtil::OPE_MD)) op = list.shift f2 = createFactor(list) e = Term.new e.value = op e.add(f) e.add(f2) f = e end return f end def createFactor(list) if (check(list, TokenUtil::NUMBER)) return createNumber(list) else if (!check(list, TokenUtil::L_PAREN)) raise SyntaxError end list.shift # L_PAREN e = createExpression(list) if (!check(list, TokenUtil::R_PAREN)) raise SyntaxError end list.shift # R_PAREN return e end end def createNumber(list) t = list.shift e = NumberExpression.new e.value = t return e end private def check(list, type) list.first.type == type end class SyntaxError < StandardError; end end class AbstractExpression attr_accessor :value, :children def initialize @value = [] @children = [] end def add(node); end def compile(objectCodeList); end end class Program < AbstractExpression def add(e) @children << e end def compile(objectCodeList) @children[0].compile(objectCodeList) end end class Expression < AbstractExpression def add(e) @children << e end def compile(objectCodeList) @children[0].compile(objectCodeList) if (@children.size > 1) @children[1].compile(objectCodeList) if ("+" == @value.s) objectCodeList << Compiler::IADD elsif ("-" == @value.s) objectCodeList << Compiler::ISUB end end end end class Term < AbstractExpression def add(e) @children << e end def compile(objectCodeList) @children[0].compile(objectCodeList) if (@children.size > 1) @children[1].compile(objectCodeList) if ("*" == @value.s) objectCodeList << Compiler::IMUL elsif ("/" == @value.s) objectCodeList << Compiler::IDIV end end end end class NumberExpression < AbstractExpression def compile(objectCodeList) objectCodeList << Compiler::BIPUSH objectCodeList << @value.n end end class Scanner attr_accessor :current attr_reader :filename def initialize(filename) @current = 0 @filename = filename end def createTokenList return readLines end def readLines tokenList = [] File.open(@filename) do |f| while (s = f.gets) s.chomp! @current += 1 next if s.length == 0 tokenizer = S1sTokenizer.new(s) tokenizer.each do |token, index| token.indexNumber = index tokenList << token end end end return tokenList end end class S1sTokenizer attr_reader :line def initialize(s) @line = s end def each index = 0 while (index <= @line.size) ch = @line[index,1] if isdigit?(ch) # 数字の場合 n = "" n << ch offset = 1 while (index+offset <= @line.size && isdigit?(@line[index+offset,1])) n << @line[index+offset] offset += 1 end yield Token.new(TokenUtil::NUMBER, n.to_f), index index += offset - 1 elsif isletter?(ch) # 'a' -- 'z' s = "" s << ch offset = 1 while (index+offset <= @line.size && isletter?(@line[index+offset,1])) s << @line[index+offset] offset += 1 end if TokenUtil.isKeyword?(s) yield token = Token.new(TokenUtil::KEYWORD, s), index else yield token = Token.new(TokenUtil::IDENTIFIER, s), index end index += offset - 1 elsif isspace?(ch) # space, tab index += 1 next else # {,},(,....etc case ch when "{" yield token = Token.new(TokenUtil::L_BRACE, ch), index when "}" yield token = Token.new(TokenUtil::R_BRACE, ch), index when "(" yield token = Token.new(TokenUtil::L_PAREN, ch), index when ")" yield token = Token.new(TokenUtil::R_PAREN, ch), index when "+", "-" yield token = Token.new(TokenUtil::OPE_AS, ch), index when "*", "/" yield token = Token.new(TokenUtil::OPE_MD, ch), index else yield token = Token.new(TokenUtil::ERROR, ch), index end end index += 1 end end def isdigit?(ch) ch >= "0" && ch <= "9" end def isletter?(ch) ch >= "a" && ch <= "z" end def isspace?(ch) ch == " " || ch == "\t" || ch == "" end end class Token attr_accessor :type, :s, :n attr_accessor :lineNumber, :indexNumber def initialize(t, c) @type = t if c.instance_of?(String) @s = c else @n = c end end end class TokenUtil ERROR = 0 NUMBER = 1 OPE_AS = 2 # + - OPE_MD = 3 # * / L_PAREN = 4 # ( R_PAREN = 5 # ) L_BRACE = 6 # { R_BRACE = 7 # } KEYWORD = 8 # 予約語 IDENTIFIER = 9 # 識別子 KEYWORDS = %w| main | SYMBOLS = %w| + - * / ( ) { } | def self.isKeyword?(s) return KEYWORDS.include?(s) end def self.isSymbol?(s) return SYMBOLS.include?(s) end end if __FILE__ == $0 compiler = Compiler.new compiler.compile(ARGV[0]) end
実行結果
mmasa@debian:~/work/ruby/svm$ cat sample.sls main{ 13*(4 + 12) } mmasa@debian:~/work/ruby/svm$ ruby svm1c.rb sample.sls mmasa@debian:~/work/ruby/svm$ ruby svm1.rb a.svm 208
今回、この題材に取り組んでみて、
と実感しました。