缘起

最近阅读<<自制编程语言>>一书
开头一章便是教读者使用lex/yacc工具
制作四则运算器
其中yacc的移进/归约的思想很有启发
于是使用golang练习之

目标

  • 制作一个四则运算器, 从os.Stdin读入一行表达式, 然后输出计算过程和结果
  • 支持+ - * /
  • 支持左右括号
  • 支持负数

难点

  • 记号扫描(lexer)
    • 逐字符扫描记号
    • 单字符记号可直接识别, + - * / ( )
    • 多字符记号, 此处只有浮点数, 通过有限状态的转换进行识别
      • + ‘-‘ = INT_STATUS
      • + ‘\d’ = INT_STATUS
      • + ‘.’ = DOT_STATUS
      • + ‘SPACE | + | - | * | / | ( | )’ = INITIAL
      • + ‘\d’ = FRAG_STATUS
      • + ‘SPACE | + | - | * | / | ( | )’ = INITIAL
  • 运算优先级
      • /优先级最高, 可以立即归约计算
    • 括号次之, 遇到右括号, 应当触发+ -归约
    • 程序末尾, 没有新的记号剩余, 对+ -进行归约
  • 识别负数
    • 简单起见, 本程序总是使用浮点数作为基本计算单位
    • 把负号识别为浮点数的可选部分: 浮点数 = -?\d+(.\d+)?

总体流程

  • 从os.Stdin读入一行表达式字符串
  • 逐字符扫描记号流, 放入记号队列
  • 逐记号出队, 置入计算栈
  • 判断栈顶是否符合归约条件, 是则进行计算
  • 记号队列空, 对计算栈进行最终计算
  • 输出结果

main.go

从os.Stdin循环读入行
调用lexer.Parse获得记号流
调用parser.Parse进行计算

  1. func main() {
  2. reader := bufio.NewReader(os.Stdin)
  3. for {
  4. fmt.Printf("=> ")
  5. arrBytes, _, err := reader.ReadLine()
  6. if err != nil {
  7. panic(err.Error())
  8. }
  9. line := strings.TrimSpace(string(arrBytes))
  10. expression := line
  11. tokens, e := lexer.Parse(expression)
  12. if e != nil {
  13. println(e.Error())
  14. } else {
  15. e,v := parser.Parse(tokens)
  16. if e != nil {
  17. println(e.Error())
  18. }
  19. fmt.Println(strconv.FormatFloat(v, 'f', 10, 64))
  20. }
  21. }
  22. }

tokens/tokens.go

定义记号

  1. package tokens
  2. type TOKENS string
  3. const IntLiteral TOKENS = "INT"
  4. const DoubleLiteral TOKENS = "DBL"
  5. const ADD TOKENS = "ADD"
  6. const SUB TOKENS = "SUB"
  7. const MUL TOKENS = "MUL"
  8. const DIV TOKENS = "DIV"
  9. const LB TOKENS = "LB"
  10. const RB TOKENS = "RB"
  11. const UNKNOWN TOKENS = "UNKNOWN"
  12. type Token struct {
  13. Token TOKENS
  14. Value string
  15. Position int
  16. }
  17. func OfRune(t TOKENS, r rune, from int) *Token {
  18. return &Token {
  19. Token: t,
  20. Value : string(r),
  21. Position: from,
  22. }
  23. }
  24. func OfString(t TOKENS, s string, from int) *Token {
  25. return &Token {
  26. Token: t,
  27. Value : s,
  28. Position: from,
  29. }
  30. }

states/states.go

定义lexer的状态

  1. type STATES int
  2. const INITIAL STATES = 1
  3. const INT_STATUS STATES = 11
  4. const DOT_STATUS STATES = 12
  5. const FRAG_STATUS STATES = 13

lexer/lexer.go

记号扫描

  1. type tLexerState struct {
  2. state states.STATES
  3. tokens []*tokens.Token
  4. buffer []rune
  5. i0 int
  6. i1 int
  7. d0 int
  8. d1 int
  9. }
  10. func (me *tLexerState) AppendToken(t *tokens.Token) {
  11. me.tokens = append(me.tokens, t)
  12. }
  13. func (me *tLexerState) AppendChar(it rune) {
  14. me.buffer = append(me.buffer, it)
  15. }
  16. func (me *tLexerState) BufferSize() int {
  17. return len(me.buffer)
  18. }
  19. func (me *tLexerState) IntSize() int {
  20. return me.i1 - me.i0 + 1
  21. }
  22. func (me *tLexerState) FragSize() int {
  23. return me.d1 - me.d0 + 1
  24. }
  25. func Parse(line string) ([]*tokens.Token, error) {
  26. var state = &(tLexerState{
  27. state: states.INITIAL,
  28. tokens: make([]*tokens.Token, 0),
  29. buffer: make([]rune, 0),
  30. i0 : 0,
  31. i1 : 0,
  32. d0: 0,
  33. d1: 0,
  34. })
  35. for i, it := range line + "\n" {
  36. e := parseChar(state, i, it)
  37. if e != nil {
  38. return nil, e
  39. }
  40. }
  41. return state.tokens, nil
  42. }
  43. func parseChar(state *tLexerState, i int, it rune) error {
  44. var e error = nil
  45. switch state.state {
  46. case states.INITIAL:
  47. e = parseCharWhenInitial(state, i, it)
  48. break
  49. case states.INT_STATUS:
  50. e = parseCharWhenInt(state, i, it)
  51. break
  52. case states.DOT_STATUS:
  53. e = parseCharWhenDot(state, i, it)
  54. break
  55. case states.FRAG_STATUS:
  56. e = parseCharWhenFrag(state, i, it)
  57. break
  58. }
  59. return e
  60. }
  61. func parseCharWhenInitial(state *tLexerState, i int, it rune) error {
  62. if is_minus(it) || is_0_to_9(it) {
  63. state.state = states.INT_STATUS
  64. state.buffer = make([]rune, 0)
  65. state.buffer = append(state.buffer, it)
  66. state.i0 = i
  67. state.i1 = i
  68. } else if is_space(it){
  69. return nil
  70. } else if is_add(it) {
  71. state.AppendToken(tokens.OfRune(tokens.ADD, it, i))
  72. } else if is_sub(it) {
  73. state.AppendToken(tokens.OfRune(tokens.SUB, it, i))
  74. } else if is_mul(it) {
  75. state.AppendToken(tokens.OfRune(tokens.MUL, it, i))
  76. } else if is_div(it) {
  77. state.AppendToken(tokens.OfRune(tokens.DIV, it, i))
  78. } else if is_lb(it) {
  79. state.AppendToken(tokens.OfRune(tokens.LB, it, i))
  80. } else if is_rb(it) {
  81. state.AppendToken(tokens.OfRune(tokens.RB, it, i))
  82. } else {
  83. return errors.New(fmt.Sprintf("parseCharWhenInitial, invalid char %c at %d", it, i))
  84. }
  85. return nil
  86. }
  87. func parseCharWhenInt(state *tLexerState, i int, it rune) error {
  88. if is_0_to_9(it) {
  89. if state.BufferSize() >= 10 {
  90. return errors.New(fmt.Sprintf("too large int number at %v", i))
  91. } else {
  92. state.AppendChar(it)
  93. state.i1 = i
  94. }
  95. } else if is_dot(it) {
  96. state.AppendChar(it)
  97. state.state = states.DOT_STATUS
  98. } else if is_space(it) {
  99. state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))
  100. state.state = states.INITIAL
  101. } else if is_rb(it) {
  102. state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))
  103. state.AppendToken(tokens.OfRune(tokens.RB, it, i))
  104. state.state = states.INITIAL
  105. } else if is_add(it) {
  106. state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))
  107. state.AppendToken(tokens.OfRune(tokens.ADD, it, i))
  108. state.state = states.INITIAL
  109. } else if is_sub(it) {
  110. state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))
  111. state.AppendToken(tokens.OfRune(tokens.SUB, it, i))
  112. state.state = states.INITIAL
  113. } else if is_mul(it) {
  114. state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))
  115. state.AppendToken(tokens.OfRune(tokens.MUL, it, i))
  116. state.state = states.INITIAL
  117. } else if is_div(it) {
  118. state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1))
  119. state.AppendToken(tokens.OfRune(tokens.DIV, it, i))
  120. state.state = states.INITIAL
  121. } else {
  122. return errors.New(fmt.Sprintf("parseCharWhenInt, invalid char %c at %d", it, i))
  123. }
  124. return nil
  125. }
  126. func parseCharWhenDot(state *tLexerState, i int, it rune) error {
  127. if is_0_to_9(it) {
  128. state.state = states.FRAG_STATUS
  129. state.AppendChar(it)
  130. state.d0 = i
  131. state.d1 = i
  132. } else {
  133. return errors.New(fmt.Sprintf("parseCharWhenDot, invalid char %c at %d", it, i))
  134. }
  135. return nil
  136. }
  137. func parseCharWhenFrag(state *tLexerState, i int, it rune) error {
  138. if is_0_to_9(it) {
  139. if state.FragSize() >= 10 {
  140. return errors.New(fmt.Sprintf("too many chars for a double value at %d", i))
  141. } else {
  142. state.AppendChar(it)
  143. state.d1 = i
  144. }
  145. } else if is_space(it) {
  146. state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))
  147. state.state = states.INITIAL
  148. } else if is_add(it) {
  149. state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))
  150. state.AppendToken(tokens.OfRune(tokens.ADD, it, i))
  151. state.state = states.INITIAL
  152. } else if is_sub(it) {
  153. state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))
  154. state.AppendToken(tokens.OfRune(tokens.SUB, it, i))
  155. state.state = states.INITIAL
  156. } else if is_mul(it) {
  157. state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))
  158. state.AppendToken(tokens.OfRune(tokens.MUL, it, i))
  159. state.state = states.INITIAL
  160. } else if is_div(it) {
  161. state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))
  162. state.AppendToken(tokens.OfRune(tokens.DIV, it, i))
  163. state.state = states.INITIAL
  164. } else if is_rb(it) {
  165. state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1))
  166. state.AppendToken(tokens.OfRune(tokens.RB, it, i))
  167. state.state = states.INITIAL
  168. } else {
  169. return errors.New(fmt.Sprintf("parseCharWhenFrag, invalid char %c at %d", it, i))
  170. }
  171. return nil
  172. }

parser/tStackNode.go

定义计算栈的一个节点. 计算栈中有两种节点: 已归约的值节点, 和尚未计算的记号节点

  1. type tStackNodeType int
  2. const NODE_VALUE tStackNodeType = 1
  3. const NODE_TOKEN tStackNodeType = 2
  4. type tStackNode struct {
  5. flag tStackNodeType
  6. token *tokens.Token
  7. value float64
  8. }
  9. func newValueNode(value float64) *tStackNode {
  10. return &tStackNode{
  11. flag: NODE_VALUE,
  12. value: value,
  13. token: nil,
  14. }
  15. }
  16. func newTokenNode(token *tokens.Token) *tStackNode {
  17. return &tStackNode{
  18. flag: NODE_TOKEN,
  19. token: token,
  20. value: 0,
  21. }
  22. }
  23. func (me *tStackNode) getTokenType() tokens.TOKENS {
  24. switch me.flag {
  25. case NODE_VALUE:
  26. return tokens.DoubleLiteral
  27. case NODE_TOKEN:
  28. switch me.token.Token {
  29. case tokens.IntLiteral:
  30. fallthrough
  31. case tokens.DoubleLiteral:
  32. return tokens.DoubleLiteral
  33. default:
  34. return me.token.Token
  35. }
  36. }
  37. return tokens.UNKNOWN
  38. }
  39. func (me *tStackNode) getDoubleValue() float64 {
  40. switch me.flag {
  41. case NODE_VALUE:
  42. return me.value
  43. case NODE_TOKEN:
  44. switch me.token.Token {
  45. case tokens.IntLiteral:
  46. fallthrough
  47. case tokens.DoubleLiteral:
  48. v1,e1 := strconv.ParseFloat(me.token.Value, 64)
  49. if e1 != nil {
  50. panic(e1)
  51. }
  52. return v1
  53. }
  54. }
  55. panic("value not avaiable")
  56. }

parser/parser.go

  1. type tParser struct {
  2. tokens []*tokens.Token
  3. stack []*tStackNode
  4. total int
  5. position int
  6. }
  7. func newParser(tokens []*tokens.Token) *tParser {
  8. return &tParser{
  9. tokens: tokens,
  10. stack: make([]*tStackNode, 0),
  11. total : len(tokens),
  12. position: -1,
  13. }
  14. }
  15. func (me *tParser) showStack() string {
  16. lst := make([]string, 0)
  17. for _,it := range me.stack {
  18. switch it.flag {
  19. case NODE_VALUE:
  20. lst = append(lst, strconv.FormatFloat(it.value, 'f', 10, 64))
  21. break
  22. case NODE_TOKEN:
  23. switch it.token.Token {
  24. case tokens.DoubleLiteral:
  25. fallthrough
  26. case tokens.IntLiteral:
  27. lst = append(lst, it.token.Value)
  28. break
  29. default:
  30. lst = append(lst, it.token.Value)
  31. break
  32. }
  33. }
  34. }
  35. return strings.Join(lst, " ")
  36. }
  37. func (me *tParser) moreToken() bool {
  38. return me.position < me.total - 1
  39. }
  40. func (me *tParser) nextToken() *tokens.Token {
  41. if !me.moreToken() {
  42. return nil
  43. }
  44. me.position++
  45. return me.currentToken()
  46. }
  47. func (me *tParser) currentToken() *tokens.Token {
  48. if me.position >= me.total {
  49. return nil
  50. }
  51. return me.tokens[me.position]
  52. }
  53. func (me *tParser) reduce() {
  54. sCurrentStack := ""
  55. var fnCheckState = func() {
  56. sStackState := me.showStack()
  57. if sStackState != sCurrentStack {
  58. sCurrentStack = sStackState
  59. fmt.Printf("stack => %s\n", sStackState)
  60. }
  61. }
  62. for {
  63. fnCheckState()
  64. if me.reduceMulOrDiv() {
  65. continue
  66. }
  67. if me.reduceBrackets() {
  68. continue
  69. }
  70. if !me.moreToken() {
  71. if me.reduceAddOrSub() {
  72. break
  73. }
  74. }
  75. fnCheckState()
  76. //time.Sleep(1*time.Second)
  77. break
  78. }
  79. }
  80. func (me *tParser) stackPop() *tStackNode {
  81. if me.isStackEmpty() {
  82. return nil
  83. }
  84. var iStackSize = len(me.stack)
  85. var last = me.stack[iStackSize - 1]
  86. me.stack = me.stack[:(iStackSize-1)]
  87. return last
  88. }
  89. func (me *tParser) stackPopN(n int) []*tStackNode {
  90. if me.isStackEmpty() {
  91. return nil
  92. }
  93. var iStackSize = len(me.stack)
  94. if iStackSize < n {
  95. return nil
  96. }
  97. var lstTailItems = me.stack[(iStackSize - n):]
  98. me.stack = me.stack[:(iStackSize-n)]
  99. return lstTailItems
  100. }
  101. func (me *tParser) stackTakeN(n int) []*tStackNode {
  102. if me.isStackEmpty() {
  103. return nil
  104. }
  105. var iStackSize = len(me.stack)
  106. if iStackSize < n {
  107. return nil
  108. }
  109. var lstHeadItems = me.stack[:n]
  110. me.stack = me.stack[n+1:]
  111. return lstHeadItems
  112. }
  113. func (me *tParser) stackPush(it *tStackNode) {
  114. me.stack = append(me.stack, it)
  115. }
  116. func (me *tParser) reduceMulOrDiv() bool {
  117. if me.isStackEmpty() {
  118. return false
  119. }
  120. if me.isStackRMatch(tokens.DoubleLiteral, tokens.MUL, tokens.DoubleLiteral) {
  121. var lstTailNodes = me.stackPopN(3)
  122. v1 := lstTailNodes[0].getDoubleValue()
  123. v2 := lstTailNodes[2].getDoubleValue()
  124. var v = v1*v2
  125. me.stackPush(newValueNode(v))
  126. return true
  127. } else if me.isStackRMatch(tokens.DoubleLiteral, tokens.DIV, tokens.DoubleLiteral) {
  128. var lstTailNodes = me.stackPopN(3)
  129. v1 := lstTailNodes[0].getDoubleValue()
  130. v2 := lstTailNodes[2].getDoubleValue()
  131. if v2 == 0 {
  132. panic(fmt.Sprintf("div by zero, %v / %v", v1, v2))
  133. }
  134. var v = v1/v2
  135. me.stackPush(newValueNode(v))
  136. return true
  137. }
  138. return false
  139. }
  140. func (me *tParser) reduceBrackets() bool {
  141. if me.isStackEmpty() {
  142. return false
  143. }
  144. if me.isStackRMatch(tokens.RB) {
  145. rb := me.stackPop()
  146. var v float64 = 0
  147. for {
  148. if me.isStackRMatch(tokens.LB, tokens.DoubleLiteral) {
  149. var lstTailNodes = me.stackPopN(2)
  150. v += lstTailNodes[1].getDoubleValue()
  151. me.stackPush(newValueNode(v))
  152. return true
  153. } else if me.isStackRMatch(tokens.ADD, tokens.DoubleLiteral) {
  154. var lstTailNodes = me.stackPopN(2)
  155. v1 := lstTailNodes[1].getDoubleValue()
  156. v = v + v1
  157. } else if me.isStackRMatch(tokens.SUB, tokens.DoubleLiteral) {
  158. var lstTailNodes = me.stackPopN(2)
  159. v1 := lstTailNodes[1].getDoubleValue()
  160. v = v - v1
  161. } else {
  162. panic(fmt.Sprintf("LB not found at %v", rb.token.Position))
  163. }
  164. }
  165. }
  166. return false
  167. }
  168. func (me *tParser) reduceAddOrSub() bool {
  169. var v float64 = 0
  170. for {
  171. if me.isStackEmpty() {
  172. break
  173. }
  174. if me.isStackRMatch(tokens.ADD, tokens.DoubleLiteral) {
  175. var lstTailNodes = me.stackPopN(2)
  176. v1 := lstTailNodes[1].getDoubleValue()
  177. v = v + v1
  178. } else if me.isStackRMatch(tokens.SUB, tokens.DoubleLiteral) {
  179. var lstTailNodes = me.stackPopN(2)
  180. v1 := lstTailNodes[1].getDoubleValue()
  181. v = v - v1
  182. } else if len(me.stack)==1 && me.isStackRMatch(tokens.DoubleLiteral) {
  183. it := me.stackPop()
  184. v += it.getDoubleValue()
  185. me.stackPush(newValueNode(v))
  186. return true
  187. } else {
  188. panic("invalid expression")
  189. }
  190. }
  191. return false
  192. }
  193. func (me *tParser) isStackEmpty() bool {
  194. return len(me.stack) == 0
  195. }
  196. func (me *tParser) isStackRMatch(args...tokens.TOKENS) bool {
  197. var iArgsSize = len(args)
  198. if iArgsSize <= 0 {
  199. return false
  200. }
  201. var iStackSize = len(me.stack)
  202. if iStackSize < iArgsSize {
  203. return false
  204. }
  205. for i,it := range args {
  206. var n = iStackSize - iArgsSize + i
  207. var xStackNode = me.stack[n]
  208. if it != xStackNode.getTokenType() {
  209. return false
  210. }
  211. }
  212. return true
  213. }
  214. func (me *tParser) isStackLMatch(args...tokens.TOKENS) bool {
  215. var iArgsSize = len(args)
  216. if iArgsSize <= 0 {
  217. return false
  218. }
  219. var iStackSize = len(me.stack)
  220. if iStackSize < iArgsSize {
  221. return false
  222. }
  223. for i,it := range args {
  224. var xStackNode = me.stack[i]
  225. if it != xStackNode.getTokenType() {
  226. return false
  227. }
  228. }
  229. return true
  230. }
  231. func (me *tParser) parse() (error, float64) {
  232. for {
  233. t := me.nextToken()
  234. if t == nil {
  235. break
  236. }
  237. me.stackPush(newTokenNode(t))
  238. me.reduce()
  239. }
  240. var iStackSize = len(me.stack)
  241. if iStackSize == 1 && me.stack[0].getTokenType() == tokens.DoubleLiteral {
  242. return nil, me.stack[0].getDoubleValue()
  243. }
  244. panic(fmt.Sprintf("failed parsing expression %s", me.showStack()))
  245. }
  246. func Parse(tokens []*tokens.Token) (error, float64) {
  247. parser := newParser(tokens)
  248. return parser.parse()
  249. }

输出

  1. => 1+2*(3-4*(5/6+(7-8*9)))
  2. stack => 1
  3. stack => 1 +
  4. stack => 1 + 2
  5. stack => 1 + 2 *
  6. stack => 1 + 2 * (
  7. stack => 1 + 2 * ( 3
  8. stack => 1 + 2 * ( 3 -
  9. stack => 1 + 2 * ( 3 - 4
  10. stack => 1 + 2 * ( 3 - 4 *
  11. stack => 1 + 2 * ( 3 - 4 * (
  12. stack => 1 + 2 * ( 3 - 4 * ( 5
  13. stack => 1 + 2 * ( 3 - 4 * ( 5 /
  14. stack => 1 + 2 * ( 3 - 4 * ( 5 / 6
  15. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333
  16. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 +
  17. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + (
  18. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7
  19. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 -
  20. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8
  21. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8 *
  22. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8 * 9
  23. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 72.0000000000
  24. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 72.0000000000 )
  25. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + -65.0000000000
  26. stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + -65.0000000000 )
  27. stack => 1 + 2 * ( 3 - 4 * -64.1666666667
  28. stack => 1 + 2 * ( 3 - -256.6666666667
  29. stack => 1 + 2 * ( 3 - -256.6666666667 )
  30. stack => 1 + 2 * 259.6666666667
  31. stack => 1 + 519.3333333333
  32. 520.3333333333
  33. =>