Mae向きなブログ

Mae向きな情報発信を続けていきたいと思います。

昨日の続き…

昨日に引き続き、「Javaコンパイラの基礎を理解する*1」に取り組みました。

svm1c.rb

class Compiler
  IADD = 96
  ISUB = 100
  IMUL = 104
  IDIV = 108
  BIPUSH = 16
  PRINT = -48
  def compile(filename)
    scanner = Scanner.new(filename)
    tokenList = scanner.createTokenList
    parser = Parser.new
    ae = parser.execute(tokenList)
    objectCodeList = []
    ae.compile(objectCodeList)
    objectCodeList << PRINT
    write("a.svm", objectCodeList)
  end

  private
  def write(filename, code)
    File.open(filename, "w") do |f|
      f.write(code.pack("c*"))
    end
  end
end

class Parser
  def execute(list)
    return createProgram(list)
  end
  def createProgram(list)
    t = list.shift
    if (t.type != TokenUtil::KEYWORD)
      raise SyntaxError
    end
    if ("main" != t.s)
      raise SyntaxError
    end
    if (!check(list, TokenUtil::L_BRACE))
      raise SyntaxError
    end
    list.shift                  # L_BRACE
    e = createExpression(list)
    if (!check(list, TokenUtil::R_BRACE))
      raise SyntaxError
    end
    list.shift                  # R_BRACE
    p = Program.new
    p.value = t
    p.add(e)
    return p
  end
  def createExpression(list)
    t = createTerm(list)
    while (list.size > 0)
      break if (!check(list, TokenUtil::OPE_AS))
      op = list.shift
      t2 = createTerm(list)
      e = Expression.new
      e.value = op
      e.add(t)
      e.add(t2)
      t = e
    end
    return t
  end
  def createTerm(list)
    f = createFactor(list)
    while (list.size > 0)
      break if (!check(list, TokenUtil::OPE_MD))
      op = list.shift
      f2 = createFactor(list)
      e = Term.new
      e.value = op
      e.add(f)
      e.add(f2)
      f = e
    end
    return f
  end
  def createFactor(list)
    if (check(list, TokenUtil::NUMBER))
      return createNumber(list)
    else
      if (!check(list, TokenUtil::L_PAREN))
        raise SyntaxError
      end
      list.shift                # L_PAREN
      e = createExpression(list)
      if (!check(list, TokenUtil::R_PAREN))
        raise SyntaxError
      end
      list.shift                # R_PAREN
      return e
    end
  end
  def createNumber(list)
    t = list.shift
    e = NumberExpression.new
    e.value = t
    return e
  end
  
  private
  def check(list, type)
    list.first.type == type
  end
  class SyntaxError < StandardError; end
end

class AbstractExpression
  attr_accessor :value, :children
  def initialize
    @value = []
    @children = []
  end
  def add(node); end
  def compile(objectCodeList); end
end

class Program < AbstractExpression
  def add(e)
    @children << e
  end
  def compile(objectCodeList)
    @children[0].compile(objectCodeList)
  end
end

class Expression < AbstractExpression
  def add(e)
    @children << e
  end
  def compile(objectCodeList)
    @children[0].compile(objectCodeList)
    if (@children.size > 1)
      @children[1].compile(objectCodeList)
      if ("+" == @value.s)
        objectCodeList << Compiler::IADD
      elsif ("-" == @value.s)
        objectCodeList << Compiler::ISUB
      end
    end
  end
end

class Term < AbstractExpression
  def add(e)
    @children << e
  end
  def compile(objectCodeList)
    @children[0].compile(objectCodeList)
    if (@children.size > 1)
      @children[1].compile(objectCodeList)
      if ("*" == @value.s)
        objectCodeList << Compiler::IMUL
      elsif ("/" == @value.s)
        objectCodeList << Compiler::IDIV
      end
    end
  end
end

class NumberExpression < AbstractExpression
  def compile(objectCodeList)
    objectCodeList << Compiler::BIPUSH
    objectCodeList << @value.n
  end
end

class Scanner
  attr_accessor :current
  attr_reader :filename
  def initialize(filename)
    @current = 0
    @filename = filename
  end
  def createTokenList
    return readLines
  end
  def readLines
    tokenList = []
    File.open(@filename) do |f|
      while (s = f.gets)
        s.chomp!
        @current += 1
        next if s.length == 0
        tokenizer = S1sTokenizer.new(s)
        tokenizer.each do |token, index|
          token.indexNumber = index
          tokenList << token
        end
      end
    end
    return tokenList
  end
end

class S1sTokenizer
  attr_reader :line
  def initialize(s)
    @line = s
  end
  def each
    index = 0
    while (index <= @line.size)
      ch = @line[index,1]
      if isdigit?(ch)           # 数字の場合
        n = ""
        n << ch
        offset = 1
        while (index+offset <= @line.size &&
               isdigit?(@line[index+offset,1]))
          n << @line[index+offset]
          offset += 1
        end
        yield Token.new(TokenUtil::NUMBER, n.to_f), index
        index += offset - 1
      elsif isletter?(ch)       # 'a' -- 'z'
        s = ""
        s << ch
        offset = 1
        while (index+offset <= @line.size &&
               isletter?(@line[index+offset,1]))
          s << @line[index+offset]
          offset += 1
        end
        if TokenUtil.isKeyword?(s)
          yield token = Token.new(TokenUtil::KEYWORD, s), index
        else
          yield token = Token.new(TokenUtil::IDENTIFIER, s), index
        end
        index += offset - 1
      elsif isspace?(ch)        # space, tab
        index += 1
        next
      else                      # {,},(,....etc
        case ch
        when "{"
          yield token = Token.new(TokenUtil::L_BRACE, ch), index
        when "}"
          yield token = Token.new(TokenUtil::R_BRACE, ch), index
        when "("
          yield token = Token.new(TokenUtil::L_PAREN, ch), index
        when ")"
          yield token = Token.new(TokenUtil::R_PAREN, ch), index
        when "+", "-"
          yield token = Token.new(TokenUtil::OPE_AS, ch), index
        when "*", "/"
          yield token = Token.new(TokenUtil::OPE_MD, ch), index
        else
          yield token = Token.new(TokenUtil::ERROR, ch), index
        end
      end
      index += 1
    end
  end
  def isdigit?(ch)
    ch >= "0" && ch <= "9"
  end
  def isletter?(ch)
    ch >= "a" && ch <= "z"
  end
  def isspace?(ch)
    ch == " " || ch == "\t" || ch == ""
  end
end

class Token
  attr_accessor :type, :s, :n
  attr_accessor :lineNumber, :indexNumber
  def initialize(t, c)
    @type = t
    if c.instance_of?(String)
      @s = c
    else
      @n = c
    end
  end
end

class TokenUtil
  ERROR = 0
  NUMBER = 1
  OPE_AS = 2                    # + -
  OPE_MD = 3                    # * /
  L_PAREN = 4                   # (
  R_PAREN = 5                   # )
  L_BRACE = 6                   # {
  R_BRACE = 7                   # }
  KEYWORD = 8                   # 予約語
  IDENTIFIER = 9                # 識別子
  KEYWORDS = %w| main |
  SYMBOLS = %w| + - * / ( ) { } |
  def self.isKeyword?(s)
    return KEYWORDS.include?(s)
  end
  def self.isSymbol?(s)
    return SYMBOLS.include?(s)
  end
end

if __FILE__ == $0
   compiler = Compiler.new
   compiler.compile(ARGV[0])
end

実行結果

mmasa@debian:~/work/ruby/svm$ cat sample.sls
main{
        13*(4 + 12)
}
mmasa@debian:~/work/ruby/svm$ ruby svm1c.rb sample.sls
mmasa@debian:~/work/ruby/svm$ ruby svm1.rb a.svm
208

今回、この題材に取り組んでみて、

と実感しました。