缘起

最近阅读 [云原生分布式存储基石:etcd深入解析] (杜军 , 2019.1)
本系列笔记拟采用golang练习之

raft分布式一致性算法

  1. 分布式存储系统通常会通过维护多个副本来进行容错,
  2. 以提高系统的可用性。
  3. 这就引出了分布式存储系统的核心问题——如何保证多个副本的一致性?
  4. Raft算法把问题分解成了四个子问题:
  5. 1. 领袖选举(leader election)、
  6. 2. 日志复制(log replication)、
  7. 3. 安全性(safety
  8. 4. 成员关系变化(membership changes
  9. 这几个子问题。
  10. 源码gitee地址:
  11. https://gitee.com/ioly/learning.gooop

目标

  • 根据raft协议,实现高可用分布式强一致的kv存储

子目标(Day 10)

  • 添加put/get/del kv键值对的rpc接口
  • 继续完善Leader状态的raft协议响应

设计

  • rpc/IKVStoreRPC: kv操作的rpc接口
  • store/IKVStore: kv操作的持久化接口
  • stoer/ILogStore: 从IKVStore继承,以支持kv持久化
  • lsm/IRaftState: 继承rpc.IKVStoreRPC接口,以支持kv操作
  • lsm/tLeaderState: 初步实现Leader状态的raft协议处理,事件驱动的逻辑编排,读写分离的字段管理。

rpc/IKVStoreRPC.go

kv操作的rpc接口

  1. package rpc
  2. type IKVStoreRPC interface {
  3. ExecuteKVCmd(cmd *KVCmd, ret *KVRet) error
  4. }
  5. type KVCmd struct {
  6. OPCode KVOPCode
  7. Key []byte
  8. Content []byte
  9. }
  10. type KVOPCode int
  11. const (
  12. KVGet KVOPCode = iota
  13. KVPut KVOPCode = iota
  14. KVDel KVOPCode = iota
  15. )
  16. type KVRet struct {
  17. Code KVRetCode
  18. Key []byte
  19. Content []byte
  20. }
  21. type KVRetCode int
  22. const (
  23. KVOk KVRetCode = iota
  24. KVKeyNotFound KVRetCode = iota
  25. KVInternalError KVRetCode = iota
  26. )

store/IKVStore.go

kv操作的持久化接口

  1. package store
  2. type IKVStore interface {
  3. Get(key []byte) (error, []byte)
  4. Put(key []byte, content []byte) error
  5. Del(key []byte) error
  6. }

stoer/ILogStore.go

从IKVStore继承,以支持kv持久化

  1. package store
  2. import (
  3. "learning/gooop/etcd/raft/model"
  4. )
  5. type ILogStore interface {
  6. IKVStore
  7. LastAppendedTerm() int64
  8. LastAppendedIndex() int64
  9. LastCommittedTerm() int64
  10. LastCommittedIndex() int64
  11. Append(entry *model.LogEntry) error
  12. Commit(index int64) error
  13. GetLog(index int64) (error, *model.LogEntry)
  14. }

lsm/IRaftState.go

继承rpc.IKVStoreRPC接口,以支持kv操作

  1. package lsm
  2. import (
  3. "learning/gooop/etcd/raft/roles"
  4. "learning/gooop/etcd/raft/rpc"
  5. )
  6. type IRaftState interface {
  7. rpc.IRaftRPC
  8. rpc.IKVStoreRPC
  9. Role() roles.RaftRole
  10. Start()
  11. }

lsm/tLeaderState.go

初步实现Leader状态的raft协议处理,事件驱动的逻辑编排,读写分离的字段管理。

  1. package lsm
  2. import (
  3. "errors"
  4. "learning/gooop/etcd/raft/config"
  5. "learning/gooop/etcd/raft/model"
  6. "learning/gooop/etcd/raft/roles"
  7. "learning/gooop/etcd/raft/rpc"
  8. "learning/gooop/etcd/raft/store"
  9. "learning/gooop/etcd/raft/timeout"
  10. "sync"
  11. "time"
  12. )
  13. // tLeaderState presents a leader node
  14. type tLeaderState struct {
  15. tEventDrivenModel
  16. context iRaftStateContext
  17. mInitOnce sync.Once
  18. mStartOnce sync.Once
  19. // update: leInit / leLeaderHeartbeat
  20. mTerm int64
  21. // update: leInit / leDisposing
  22. mDisposedFlag bool
  23. // update: leVoteToCandidate
  24. mVotedTerm int64
  25. mVotedCandidateID string
  26. mVotedTimestamp int64
  27. }
  28. // trigger: init()
  29. // args: empty
  30. const leInit = "leader.init"
  31. // trigger: Start()
  32. // args: empty
  33. const leStart = "leader.Start"
  34. // trigger: whenNewLeaderAnnouncedThenSwitchToFollower
  35. // args: empty
  36. const leDiposing = "leader.Disposing"
  37. // trigger : Heartbeat() / AppendLog()
  38. // args: term int64
  39. const leNewLeaderAnnounced = "leader.NewLeaderAnnounced"
  40. // trigger: RequestVote()
  41. // args: *rpc.RequestVoteCmd
  42. const leBeforeRequestVote = "leader.BeforeRequestVote"
  43. // trigger:
  44. // args: *rpc.RequestVoteCmd
  45. const leVoteToCandidate = "leader.VoteToCandidate"
  46. // trigger: handleHeartbeat()
  47. // args: term int64
  48. const leHeartbeatRejected = "leader.HeartbeatRejected"
  49. func newLeaderState(ctx iRaftStateContext, term int64) IRaftState {
  50. it := new(tLeaderState)
  51. it.init(ctx, term)
  52. return it
  53. }
  54. func (me *tLeaderState) init(ctx iRaftStateContext, term int64) {
  55. me.mInitOnce.Do(func() {
  56. me.context = ctx
  57. me.mTerm = term
  58. me.initEventHandlers()
  59. me.raise(leInit)
  60. })
  61. }
  62. func (me *tLeaderState) initEventHandlers() {
  63. // write only logic
  64. me.hookEventsForDisposedFlag()
  65. me.hookEventsForVotedTerm()
  66. // read only logic
  67. me.hook(leStart,
  68. me.whenStartThenBeginHeartbeatToOthers)
  69. me.hook(leNewLeaderAnnounced,
  70. me.whenNewLeaderAnnouncedThenSwitchToFollower)
  71. me.hook(leHeartbeatRejected,
  72. me.whenHeartbeatRejectedThenSwitchToFollower)
  73. }
  74. func (me *tLeaderState) hookEventsForDisposedFlag() {
  75. me.hook(leInit, func(e string, args ...interface{}) {
  76. me.mDisposedFlag = false
  77. })
  78. me.hook(leDiposing, func(e string, args ...interface{}) {
  79. me.mDisposedFlag = true
  80. })
  81. }
  82. func (me *tLeaderState) hookEventsForVotedTerm() {
  83. me.hook(leBeforeRequestVote, func(e string, args ...interface{}) {
  84. // check last vote timeout
  85. if me.mVotedTerm == 0 {
  86. return
  87. }
  88. if time.Duration(time.Now().UnixNano() - me.mVotedTimestamp)*time.Nanosecond >= timeout.ElectionTimeout {
  89. me.mVotedTerm = 0
  90. me.mVotedTimestamp = 0
  91. me.mVotedCandidateID = ""
  92. }
  93. })
  94. me.hook(leVoteToCandidate, func(e string, args ...interface{}) {
  95. // after vote to candidate
  96. cmd := args[0].(*rpc.RequestVoteCmd)
  97. me.mVotedTerm = cmd.Term
  98. me.mVotedCandidateID = cmd.CandidateID
  99. me.mVotedTimestamp = time.Now().UnixNano()
  100. })
  101. }
  102. func (me *tLeaderState) Heartbeat(cmd *rpc.HeartbeatCmd, ret *rpc.HeartbeatRet) error {
  103. // check term
  104. if cmd.Term <= me.mTerm {
  105. ret.Code = rpc.HBTermMismatch
  106. return nil
  107. }
  108. // new leader
  109. me.raise(leNewLeaderAnnounced, cmd.Term)
  110. // return ok
  111. ret.Code = rpc.HBOk
  112. return nil
  113. }
  114. func (me *tLeaderState) AppendLog(cmd *rpc.AppendLogCmd, ret *rpc.AppendLogRet) error {
  115. // check term
  116. if cmd.Term <= me.mTerm {
  117. ret.Code = rpc.ALTermMismatch
  118. return nil
  119. }
  120. // new leader
  121. me.raise(leNewLeaderAnnounced, cmd.Term)
  122. // return ok
  123. ret.Code = rpc.ALInternalError
  124. return nil
  125. }
  126. func (me *tLeaderState) CommitLog(cmd *rpc.CommitLogCmd, ret *rpc.CommitLogRet) error {
  127. // just ignore
  128. ret.Code = rpc.CLInternalError
  129. return nil
  130. }
  131. func (me *tLeaderState) RequestVote(cmd *rpc.RequestVoteCmd, ret *rpc.RequestVoteRet) error {
  132. me.raise(leBeforeRequestVote, cmd)
  133. // check voted term
  134. if cmd.Term < me.mVotedTerm {
  135. ret.Code = rpc.RVTermMismatch
  136. return nil
  137. }
  138. if cmd.Term == me.mVotedTerm {
  139. if me.mVotedCandidateID != "" && me.mVotedCandidateID != cmd.CandidateID {
  140. // already vote another
  141. ret.Code = rpc.RVVotedAnother
  142. return nil
  143. } else {
  144. // already voted
  145. ret.Code = rpc.RVOk
  146. return nil
  147. }
  148. }
  149. if cmd.Term > me.mVotedTerm {
  150. // new term, check log
  151. if cmd.LastLogIndex >= me.context.Store().LastCommittedIndex() {
  152. // good log
  153. me.raise(leVoteToCandidate, cmd)
  154. ret.Code = rpc.RVOk
  155. } else {
  156. // bad log
  157. ret.Code = rpc.RVLogMismatch
  158. }
  159. return nil
  160. }
  161. // should not reach here
  162. ret.Code = rpc.RVTermMismatch
  163. return nil
  164. }
  165. func (me *tLeaderState) Role() roles.RaftRole {
  166. return roles.Leader
  167. }
  168. func (me *tLeaderState) Start() {
  169. me.mStartOnce.Do(func() {
  170. me.raise(leStart)
  171. })
  172. }
  173. func (me *tLeaderState) whenStartThenBeginHeartbeatToOthers(_ string, _ ...interface{}) {
  174. go func() {
  175. for !me.mDisposedFlag {
  176. _ = me.boardcast(func(_ config.IRaftNodeConfig, client rpc.IRaftRPC) error {
  177. return me.handleHeartbeat(client)
  178. })
  179. time.Sleep(timeout.HeartbeatInterval)
  180. }
  181. }()
  182. }
  183. func (me *tLeaderState) boardcast(action func(config.IRaftNodeConfig, rpc.IRaftRPC) error) error {
  184. for _,it := range me.context.Config().Nodes() {
  185. if it.ID() == me.context.Config().ID() {
  186. continue
  187. }
  188. e := me.context.RaftClientService().Using(it.ID(), func(client rpc.IRaftRPC) error {
  189. return action(it, client)
  190. })
  191. if e != nil {
  192. return e
  193. }
  194. }
  195. return nil
  196. }
  197. func (me *tLeaderState) handleHeartbeat(client rpc.IRaftRPC) error {
  198. cmd := new(rpc.HeartbeatCmd)
  199. cmd.Term = me.mTerm
  200. cmd.LeaderID = me.context.Config().ID()
  201. ret := new(rpc.HeartbeatRet)
  202. e := client.Heartbeat(cmd, ret)
  203. if e != nil {
  204. return e
  205. }
  206. switch ret.Code {
  207. case rpc.HBTermMismatch:
  208. me.raise(leHeartbeatRejected, ret.Term)
  209. break
  210. }
  211. return nil
  212. }
  213. func (me *tLeaderState) whenNewLeaderAnnouncedThenSwitchToFollower(_ string, args ...interface{}) {
  214. me.raise(leDiposing)
  215. term := args[0].(int64)
  216. me.context.HandleStateChanged(newFollowerState(me.context, term))
  217. }
  218. func (me *tLeaderState) whenHeartbeatRejectedThenSwitchToFollower(_ string, args ...interface{}) {
  219. me.raise(leDiposing)
  220. term := args[0].(int64)
  221. me.context.HandleStateChanged(newFollowerState(me.context, term))
  222. }
  223. func (me *tLeaderState) ExecuteKVCmd(cmd *rpc.KVCmd, ret *rpc.KVRet) error {
  224. switch cmd.OPCode {
  225. case rpc.KVGet:
  226. return me.handleKVGet(cmd, ret)
  227. case rpc.KVPut:
  228. return me.handleKVPut(cmd, ret)
  229. case rpc.KVDel:
  230. return me.handleKVDel(cmd, ret)
  231. }
  232. return nil
  233. }
  234. func (me *tLeaderState) handleKVGet(cmd *rpc.KVCmd, ret *rpc.KVRet) error {
  235. e, v := me.context.Store().Get(cmd.Key)
  236. if e != nil {
  237. ret.Code = rpc.KVInternalError
  238. return e
  239. }
  240. ret.Code = rpc.KVOk
  241. ret.Content = v
  242. return nil
  243. }
  244. func (me *tLeaderState) handleKVPut(cmd *rpc.KVCmd, ret *rpc.KVRet) error {
  245. kvcmd := new(store.PutCmd)
  246. kvcmd.Key = cmd.Key
  247. kvcmd.Value = cmd.Content
  248. // create/append/commit log
  249. e := me.broadcastKVCmd(kvcmd, ret)
  250. if e != nil {
  251. return e
  252. }
  253. // apply cmd
  254. return me.context.Store().Put(cmd.Key, cmd.Content)
  255. }
  256. func (me *tLeaderState) handleKVDel(cmd *rpc.KVCmd, ret *rpc.KVRet) error {
  257. kvcmd := new(store.DelCmd)
  258. kvcmd.Key = cmd.Key
  259. // create/append/commit log
  260. e := me.broadcastKVCmd(kvcmd, ret)
  261. if e != nil {
  262. return e
  263. }
  264. // apply cmd
  265. return me.context.Store().Put(cmd.Key, cmd.Content)
  266. }
  267. func (me *tLeaderState) broadcastKVCmd(cmd store.IKVCmd, ret *rpc.KVRet) error {
  268. // create log
  269. st := me.context.Store()
  270. log := new(model.LogEntry)
  271. log.Term = me.mTerm
  272. log.Index = st.LastCommittedIndex() + 1
  273. log.PrevTerm = st.LastCommittedTerm()
  274. log.PrevIndex = st.LastCommittedIndex()
  275. log.Command = cmd.Marshal()
  276. // append log
  277. e := st.Append(log)
  278. if e != nil {
  279. ret.Code = rpc.KVInternalError
  280. return e
  281. }
  282. // ask other nodes to append log
  283. alcmd := new(rpc.AppendLogCmd)
  284. alcmd.Term = me.mTerm
  285. alcmd.LeaderID = me.context.Config().ID()
  286. alcmd.Entry = log
  287. sumOk := []int{ 0 }
  288. _ = me.boardcast(func(_ config.IRaftNodeConfig, client rpc.IRaftRPC) error {
  289. alret := new(rpc.AppendLogRet)
  290. e := client.AppendLog(alcmd, alret)
  291. if e != nil {
  292. return e
  293. }
  294. switch alret.Code {
  295. case rpc.ALOk:
  296. sumOk[0]++
  297. break
  298. case rpc.ALTermMismatch:
  299. // todo: fixme
  300. break
  301. case rpc.ALIndexMismatch:
  302. // todo: fixme
  303. break
  304. }
  305. return nil
  306. })
  307. // wait for most nodes
  308. if sumOk[0] >= len(me.context.Config().Nodes()) / 2 {
  309. // commit log
  310. clcmd := new(rpc.CommitLogCmd)
  311. clcmd.LeaderID = me.context.Config().ID()
  312. clcmd.Term = me.mTerm
  313. clcmd.Index = log.Index
  314. _ = me.boardcast(func(_ config.IRaftNodeConfig, client rpc.IRaftRPC) error {
  315. ret := new(rpc.CommitLogRet)
  316. e := client.CommitLog(clcmd, ret)
  317. if e != nil {
  318. return e
  319. }
  320. switch ret.Code {
  321. case rpc.CLInternalError:
  322. // todo: fixme
  323. break
  324. case rpc.CLLogNotFound:
  325. // todo: fixme
  326. break
  327. case rpc.CLOk:
  328. return nil
  329. }
  330. return nil
  331. })
  332. // ok
  333. return nil
  334. } else {
  335. return gErrorCannotReachAgreement
  336. }
  337. }
  338. var gErrorCannotReachAgreement = errors.New("cannot reach agreement")

(未完待续)