缘起
最近阅读<<自制编程语言>>一书
开头一章便是教读者使用lex/yacc工具
制作四则运算器
其中yacc的移进/归约的思想很有启发
于是使用golang练习之
目标
- 制作一个四则运算器, 从os.Stdin读入一行表达式, 然后输出计算过程和结果
- 支持+ - * /
- 支持左右括号
- 支持负数
难点
- 记号扫描(lexer)
- 逐字符扫描记号
- 单字符记号可直接识别, + - * / ( )
- 多字符记号, 此处只有浮点数, 通过有限状态的转换进行识别
+ ‘-‘ = INT_STATUS + ‘\d’ = INT_STATUS + ‘.’ = DOT_STATUS + ‘SPACE | + | - | * | / | ( | )’ = INITIAL + ‘\d’ = FRAG_STATUS + ‘SPACE | + | - | * | / | ( | )’ = INITIAL
- 运算优先级
- /优先级最高, 可以立即归约计算
- 括号次之, 遇到右括号, 应当触发+ -归约
- 程序末尾, 没有新的记号剩余, 对+ -进行归约
- 识别负数
- 简单起见, 本程序总是使用浮点数作为基本计算单位
- 把负号识别为浮点数的可选部分: 浮点数 = -?\d+(.\d+)?
总体流程
- 从os.Stdin读入一行表达式字符串
- 逐字符扫描记号流, 放入记号队列
- 逐记号出队, 置入计算栈
- 判断栈顶是否符合归约条件, 是则进行计算
- 记号队列空, 对计算栈进行最终计算
- 输出结果
main.go
从os.Stdin循环读入行
调用lexer.Parse获得记号流
调用parser.Parse进行计算
func main() {reader := bufio.NewReader(os.Stdin)for {fmt.Printf("=> ")arrBytes, _, err := reader.ReadLine()if err != nil {panic(err.Error())}line := strings.TrimSpace(string(arrBytes))expression := linetokens, e := lexer.Parse(expression)if e != nil {println(e.Error())} else {e,v := parser.Parse(tokens)if e != nil {println(e.Error())}fmt.Println(strconv.FormatFloat(v, 'f', 10, 64))}}}
tokens/tokens.go
定义记号
package tokenstype TOKENS stringconst IntLiteral TOKENS = "INT"const DoubleLiteral TOKENS = "DBL"const ADD TOKENS = "ADD"const SUB TOKENS = "SUB"const MUL TOKENS = "MUL"const DIV TOKENS = "DIV"const LB TOKENS = "LB"const RB TOKENS = "RB"const UNKNOWN TOKENS = "UNKNOWN"type Token struct {Token TOKENSValue stringPosition int}func OfRune(t TOKENS, r rune, from int) *Token {return &Token {Token: t,Value : string(r),Position: from,}}func OfString(t TOKENS, s string, from int) *Token {return &Token {Token: t,Value : s,Position: from,}}
states/states.go
定义lexer的状态
type STATES intconst INITIAL STATES = 1const INT_STATUS STATES = 11const DOT_STATUS STATES = 12const FRAG_STATUS STATES = 13
lexer/lexer.go
记号扫描
type tLexerState struct {state states.STATEStokens []*tokens.Tokenbuffer []runei0 inti1 intd0 intd1 int}func (me *tLexerState) AppendToken(t *tokens.Token) {me.tokens = append(me.tokens, t)}func (me *tLexerState) AppendChar(it rune) {me.buffer = append(me.buffer, it)}func (me *tLexerState) BufferSize() int {return len(me.buffer)}func (me *tLexerState) IntSize() int {return me.i1 - me.i0 + 1}func (me *tLexerState) FragSize() int {return me.d1 - me.d0 + 1}func Parse(line string) ([]*tokens.Token, error) {var state = &(tLexerState{state: states.INITIAL,tokens: make([]*tokens.Token, 0),buffer: make([]rune, 0),i0 : 0,i1 : 0,d0: 0,d1: 0,})for i, it := range line + "\n" {e := parseChar(state, i, it)if e != nil {return nil, e}}return state.tokens, nil}func parseChar(state *tLexerState, i int, it rune) error {var e error = nilswitch state.state {case states.INITIAL:e = parseCharWhenInitial(state, i, it)breakcase states.INT_STATUS:e = parseCharWhenInt(state, i, it)breakcase states.DOT_STATUS:e = parseCharWhenDot(state, i, it)breakcase states.FRAG_STATUS:e = parseCharWhenFrag(state, i, it)break}return e}func parseCharWhenInitial(state *tLexerState, i int, it rune) error {if is_minus(it) || is_0_to_9(it) {state.state = states.INT_STATUSstate.buffer = make([]rune, 0)state.buffer = append(state.buffer, it)state.i0 = istate.i1 = i} else if is_space(it){return nil} else if is_add(it) {state.AppendToken(tokens.OfRune(tokens.ADD, it, i))} else if is_sub(it) {state.AppendToken(tokens.OfRune(tokens.SUB, it, i))} else if is_mul(it) {state.AppendToken(tokens.OfRune(tokens.MUL, it, i))} else if is_div(it) {state.AppendToken(tokens.OfRune(tokens.DIV, it, i))} else if is_lb(it) {state.AppendToken(tokens.OfRune(tokens.LB, it, i))} else if is_rb(it) {state.AppendToken(tokens.OfRune(tokens.RB, it, i))} else {return errors.New(fmt.Sprintf("parseCharWhenInitial, invalid char %c at %d", it, i))}return nil}func parseCharWhenInt(state *tLexerState, i int, it rune) error {if is_0_to_9(it) {if state.BufferSize() >= 10 {return errors.New(fmt.Sprintf("too large int number at %v", i))} else {state.AppendChar(it)state.i1 = i}} else if is_dot(it) {state.AppendChar(it)state.state = states.DOT_STATUS} else if is_space(it) {state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))state.state = states.INITIAL} else if is_rb(it) {state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.RB, it, i))state.state = states.INITIAL} else if is_add(it) {state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.ADD, it, i))state.state = states.INITIAL} else if is_sub(it) {state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.SUB, it, i))state.state = states.INITIAL} else if is_mul(it) {state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.MUL, it, i))state.state = states.INITIAL} else if is_div(it) {state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.DIV, it, i))state.state = states.INITIAL} else {return errors.New(fmt.Sprintf("parseCharWhenInt, invalid char %c at %d", it, i))}return nil}func parseCharWhenDot(state *tLexerState, i int, it rune) error {if is_0_to_9(it) {state.state = states.FRAG_STATUSstate.AppendChar(it)state.d0 = istate.d1 = i} else {return errors.New(fmt.Sprintf("parseCharWhenDot, invalid char %c at %d", it, i))}return nil}func parseCharWhenFrag(state *tLexerState, i int, it rune) error {if is_0_to_9(it) {if state.FragSize() >= 10 {return errors.New(fmt.Sprintf("too many chars for a double value at %d", i))} else {state.AppendChar(it)state.d1 = i}} else if is_space(it) {state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))state.state = states.INITIAL} else if is_add(it) {state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.ADD, it, i))state.state = states.INITIAL} else if is_sub(it) {state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.SUB, it, i))state.state = states.INITIAL} else if is_mul(it) {state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.MUL, it, i))state.state = states.INITIAL} else if is_div(it) {state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.DIV, it, i))state.state = states.INITIAL} else if is_rb(it) {state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))state.AppendToken(tokens.OfRune(tokens.RB, it, i))state.state = states.INITIAL} else {return errors.New(fmt.Sprintf("parseCharWhenFrag, invalid char %c at %d", it, i))}return nil}
parser/tStackNode.go
定义计算栈的一个节点. 计算栈中有两种节点: 已归约的值节点, 和尚未计算的记号节点
type tStackNodeType intconst NODE_VALUE tStackNodeType = 1const NODE_TOKEN tStackNodeType = 2type tStackNode struct {flag tStackNodeTypetoken *tokens.Tokenvalue float64}func newValueNode(value float64) *tStackNode {return &tStackNode{flag: NODE_VALUE,value: value,token: nil,}}func newTokenNode(token *tokens.Token) *tStackNode {return &tStackNode{flag: NODE_TOKEN,token: token,value: 0,}}func (me *tStackNode) getTokenType() tokens.TOKENS {switch me.flag {case NODE_VALUE:return tokens.DoubleLiteralcase NODE_TOKEN:switch me.token.Token {case tokens.IntLiteral:fallthroughcase tokens.DoubleLiteral:return tokens.DoubleLiteraldefault:return me.token.Token}}return tokens.UNKNOWN}func (me *tStackNode) getDoubleValue() float64 {switch me.flag {case NODE_VALUE:return me.valuecase NODE_TOKEN:switch me.token.Token {case tokens.IntLiteral:fallthroughcase tokens.DoubleLiteral:v1,e1 := strconv.ParseFloat(me.token.Value, 64)if e1 != nil {panic(e1)}return v1}}panic("value not avaiable")}
parser/parser.go
type tParser struct {tokens []*tokens.Tokenstack []*tStackNodetotal intposition int}func newParser(tokens []*tokens.Token) *tParser {return &tParser{tokens: tokens,stack: make([]*tStackNode, 0),total : len(tokens),position: -1,}}func (me *tParser) showStack() string {lst := make([]string, 0)for _,it := range me.stack {switch it.flag {case NODE_VALUE:lst = append(lst, strconv.FormatFloat(it.value, 'f', 10, 64))breakcase NODE_TOKEN:switch it.token.Token {case tokens.DoubleLiteral:fallthroughcase tokens.IntLiteral:lst = append(lst, it.token.Value)breakdefault:lst = append(lst, it.token.Value)break}}}return strings.Join(lst, " ")}func (me *tParser) moreToken() bool {return me.position < me.total - 1}func (me *tParser) nextToken() *tokens.Token {if !me.moreToken() {return nil}me.position++return me.currentToken()}func (me *tParser) currentToken() *tokens.Token {if me.position >= me.total {return nil}return me.tokens[me.position]}func (me *tParser) reduce() {sCurrentStack := ""var fnCheckState = func() {sStackState := me.showStack()if sStackState != sCurrentStack {sCurrentStack = sStackStatefmt.Printf("stack => %s\n", sStackState)}}for {fnCheckState()if me.reduceMulOrDiv() {continue}if me.reduceBrackets() {continue}if !me.moreToken() {if me.reduceAddOrSub() {break}}fnCheckState()//time.Sleep(1*time.Second)break}}func (me *tParser) stackPop() *tStackNode {if me.isStackEmpty() {return nil}var iStackSize = len(me.stack)var last = me.stack[iStackSize - 1]me.stack = me.stack[:(iStackSize-1)]return last}func (me *tParser) stackPopN(n int) []*tStackNode {if me.isStackEmpty() {return nil}var iStackSize = len(me.stack)if iStackSize < n {return nil}var lstTailItems = me.stack[(iStackSize - n):]me.stack = me.stack[:(iStackSize-n)]return lstTailItems}func (me *tParser) stackTakeN(n int) []*tStackNode {if me.isStackEmpty() {return nil}var iStackSize = len(me.stack)if iStackSize < n {return nil}var lstHeadItems = me.stack[:n]me.stack = me.stack[n+1:]return lstHeadItems}func (me *tParser) stackPush(it *tStackNode) {me.stack = append(me.stack, it)}func (me *tParser) reduceMulOrDiv() bool {if me.isStackEmpty() {return false}if me.isStackRMatch(tokens.DoubleLiteral, tokens.MUL, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(3)v1 := lstTailNodes[0].getDoubleValue()v2 := lstTailNodes[2].getDoubleValue()var v = v1*v2me.stackPush(newValueNode(v))return true} else if me.isStackRMatch(tokens.DoubleLiteral, tokens.DIV, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(3)v1 := lstTailNodes[0].getDoubleValue()v2 := lstTailNodes[2].getDoubleValue()if v2 == 0 {panic(fmt.Sprintf("div by zero, %v / %v", v1, v2))}var v = v1/v2me.stackPush(newValueNode(v))return true}return false}func (me *tParser) reduceBrackets() bool {if me.isStackEmpty() {return false}if me.isStackRMatch(tokens.RB) {rb := me.stackPop()var v float64 = 0for {if me.isStackRMatch(tokens.LB, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(2)v += lstTailNodes[1].getDoubleValue()me.stackPush(newValueNode(v))return true} else if me.isStackRMatch(tokens.ADD, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(2)v1 := lstTailNodes[1].getDoubleValue()v = v + v1} else if me.isStackRMatch(tokens.SUB, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(2)v1 := lstTailNodes[1].getDoubleValue()v = v - v1} else {panic(fmt.Sprintf("LB not found at %v", rb.token.Position))}}}return false}func (me *tParser) reduceAddOrSub() bool {var v float64 = 0for {if me.isStackEmpty() {break}if me.isStackRMatch(tokens.ADD, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(2)v1 := lstTailNodes[1].getDoubleValue()v = v + v1} else if me.isStackRMatch(tokens.SUB, tokens.DoubleLiteral) {var lstTailNodes = me.stackPopN(2)v1 := lstTailNodes[1].getDoubleValue()v = v - v1} else if len(me.stack)==1 && me.isStackRMatch(tokens.DoubleLiteral) {it := me.stackPop()v += it.getDoubleValue()me.stackPush(newValueNode(v))return true} else {panic("invalid expression")}}return false}func (me *tParser) isStackEmpty() bool {return len(me.stack) == 0}func (me *tParser) isStackRMatch(args...tokens.TOKENS) bool {var iArgsSize = len(args)if iArgsSize <= 0 {return false}var iStackSize = len(me.stack)if iStackSize < iArgsSize {return false}for i,it := range args {var n = iStackSize - iArgsSize + ivar xStackNode = me.stack[n]if it != xStackNode.getTokenType() {return false}}return true}func (me *tParser) isStackLMatch(args...tokens.TOKENS) bool {var iArgsSize = len(args)if iArgsSize <= 0 {return false}var iStackSize = len(me.stack)if iStackSize < iArgsSize {return false}for i,it := range args {var xStackNode = me.stack[i]if it != xStackNode.getTokenType() {return false}}return true}func (me *tParser) parse() (error, float64) {for {t := me.nextToken()if t == nil {break}me.stackPush(newTokenNode(t))me.reduce()}var iStackSize = len(me.stack)if iStackSize == 1 && me.stack[0].getTokenType() == tokens.DoubleLiteral {return nil, me.stack[0].getDoubleValue()}panic(fmt.Sprintf("failed parsing expression %s", me.showStack()))}func Parse(tokens []*tokens.Token) (error, float64) {parser := newParser(tokens)return parser.parse()}
输出
=> 1+2*(3-4*(5/6+(7-8*9)))stack => 1stack => 1 +stack => 1 + 2stack => 1 + 2 *stack => 1 + 2 * (stack => 1 + 2 * ( 3stack => 1 + 2 * ( 3 -stack => 1 + 2 * ( 3 - 4stack => 1 + 2 * ( 3 - 4 *stack => 1 + 2 * ( 3 - 4 * (stack => 1 + 2 * ( 3 - 4 * ( 5stack => 1 + 2 * ( 3 - 4 * ( 5 /stack => 1 + 2 * ( 3 - 4 * ( 5 / 6stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 +stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + (stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 -stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8 *stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8 * 9stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 72.0000000000stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 72.0000000000 )stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + -65.0000000000stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + -65.0000000000 )stack => 1 + 2 * ( 3 - 4 * -64.1666666667stack => 1 + 2 * ( 3 - -256.6666666667stack => 1 + 2 * ( 3 - -256.6666666667 )stack => 1 + 2 * 259.6666666667stack => 1 + 519.3333333333520.3333333333=>
