缘起
最近阅读<<我的第一本算法书>>(【日】石田保辉;宫崎修一)
本系列笔记拟采用golang练习之
堆排序
堆排序的特点是利用了数据结构中的堆。首先,在堆中存储所有的数据,并按降序来构建堆。为了排序,需要再从堆中把数据一个个取出来。堆排序一开始需要将n个数据存进堆里,所需时间为O(nlogn)。每轮取出最大的数据并重构堆所需要的时间为O(logn)。由于总共有n轮,所以重构后排序的时间也是O(nlog n)。因此,整体来看堆排序的时间复杂度为O(nlog n)。这样来看,堆排序的运行时间比之前讲到的冒泡排序、选择排序、插入排序的时间O(n^2)都要短。摘自 <<我的第一本算法书>> 【日】石田保辉;宫崎修一
目标
- 构造一个堆, 并测试堆排序的效率
设计
- IHeap: 定义堆的接口
- tArrayHeap
- 基于数组的堆的实现.
- 堆是一种特殊的二叉完全树: 父节点总是小于任意的子节点
- 堆的父子节点的索引存在线性关系, 以0下标为例
- parent.index = (node.index - 1) / 2
- leftChild.index = node.index*2 + 1
- rightChild.index = leftChild.index + 1
- ISorter:
- 定义排序接口
- 定义值比较函数以兼容任意值类型
- 通过调整比较函数可实现倒序输出
- tHeapSort
- 利用tArrayHeap进行堆排序
- 先把整个数组push进heap
- 然后逐个pop出来, 即可
单元测试
heap_sort_test.go, 测试过程与之前的冒泡, 选择, 插入等类似, 但测试数组扩大到10万元素.
package sortingimport ("fmt""learning/gooop/sorting""learning/gooop/sorting/heap_sort""math/rand""testing""time")func Test_HeapSort(t *testing.T) {fnAssertTrue := func(b bool, msg string) {if !b {t.Fatal(msg)}}reversed := falsefnCompare := func(a interface{}, b interface{}) sorting.CompareResult {i1 := a.(int)i2 := b.(int)if i1 < i2 {if reversed {return sorting.GREATER} else {return sorting.LESS}} else if i1 == i2 {return sorting.EQUAL} else {if reversed {return sorting.LESS} else {return sorting.GREATER}}}fnTestSorter := func(sorter sorting.ISorter) {reversed = false// test simple arraysamples := []interface{} { 2,3,1,5,4,7,6 }sorter.Sort(samples, fnCompare)fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 2 3 4 5 6 7]", "expecting 1,2,3,4,5,6,7")t.Log("pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]")// test 10000 items sortingrnd := rand.New(rand.NewSource(time.Now().UnixNano()))sampleCount := 100*1000t.Logf("prepare large array with %v items", sampleCount)samples = make([]interface{}, sampleCount)for i := 0;i < sampleCount;i++ {samples[i] = rnd.Intn(sampleCount*10)}t.Logf("sorting large array with %v items", sampleCount)t0 := time.Now().UnixNano()sorter.Sort(samples, fnCompare)cost := time.Now().UnixNano() - t0for i := 1;i < sampleCount;i++ {fnAssertTrue(fnCompare(samples[i-1], samples[i]) != sorting.GREATER, "expecting <=")}t.Logf("end sorting large array, cost = %v ms", cost / 1000000)// test 0-20sampleCount = 20t.Log("sorting 0-20")samples = make([]interface{}, sampleCount)for i := 0;i < sampleCount;i++ {for {p := rnd.Intn(sampleCount)if samples[p] == nil {samples[p] = ibreak}}}t.Logf("unsort = %v", samples)sorter.Sort(samples, fnCompare)t.Logf("sorted = %v", samples)fnAssertTrue(fmt.Sprintf("%v", samples) == "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]", "expecting 0-20")t.Log("pass sorting 0-20")// test specialsamples = []interface{} {}sorter.Sort(samples, fnCompare)t.Log("pass sorting []")samples = []interface{} { 1 }sorter.Sort(samples, fnCompare)t.Log("pass sorting [1]")samples = []interface{} { 3,1 }sorter.Sort(samples, fnCompare)fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 3]", "expecting 1,3")t.Log("pass sorting [1 3]")reversed = truesamples = []interface{} { 2, 3,1 }sorter.Sort(samples, fnCompare)fnAssertTrue(fmt.Sprintf("%v", samples) == "[3 2 1]", "expecting 3,2,1")t.Log("pass sorting [3 2 1]")}t.Log("\ntesting HeapSort")fnTestSorter(heap_sort.HeapSort)}
测试输出
10万元素排序只需数十毫秒,
确实比冒泡, 选择, 插入等排序有指数级的提升.
代价是多占用了一个堆的空间.
$ go test -v heap_sort_test.go=== RUN Test_HeapSortheap_sort_test.go:109:testing HeapSortheap_sort_test.go:48: pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]heap_sort_test.go:53: prepare large array with 100000 itemsheap_sort_test.go:59: sorting large array with 100000 itemsheap_sort_test.go:66: end sorting large array, cost = 67 msheap_sort_test.go:70: sorting 0-20heap_sort_test.go:81: unsort = [4 15 1 13 19 14 11 12 9 8 3 7 16 18 2 10 17 6 0 5]heap_sort_test.go:84: sorted = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]heap_sort_test.go:86: pass sorting 0-20heap_sort_test.go:91: pass sorting []heap_sort_test.go:95: pass sorting [1]heap_sort_test.go:100: pass sorting [1 3]heap_sort_test.go:106: pass sorting [3 2 1]--- PASS: Test_HeapSort (0.07s)PASSok command-line-arguments 0.076s
IHeap.go
定义堆的接口
package heap_sorttype IHeap interface {Size() intIsEmpty() boolIsNotEmpty() boolPush(value interface{})Pop() (error, interface{})}
tArrayHeap
- 基于数组的堆的实现.
- 堆是一种特殊的二叉完全树: 父节点总是小于任意的子节点
- 堆的父子节点的索引存在线性关系, 以0下标为例
- parent.index = (node.index - 1) / 2
- leftChild.index = node.index*2 + 1
- rightChild.index = leftChild.index + 1 ```go package heap_sort
import ( “errors” “learning/gooop/sorting” )
type tArrayHeap struct { comparator sorting.CompareFunction items []interface{} size int version int64 }
func newArrayHeap(comparator sorting.CompareFunction) IHeap { return &tArrayHeap{ comparator: comparator, items: make([]interface{}, 0), size: 0, version: 0, } }
func (me *tArrayHeap) Size() int { return me.size }
func (me *tArrayHeap) IsEmpty() bool { return me.size <= 0 }
func (me *tArrayHeap) IsNotEmpty() bool { return !me.IsEmpty() }
func (me *tArrayHeap) Push(value interface{}) { me.version++
me.ensureSize(me.size + 1)me.items[me.size] = valueme.size++me.shiftUp(me.size - 1)me.version++
}
func (me *tArrayHeap) ensureSize(size int) { for ;len(me.items) < size; { me.items = append(me.items, nil) } }
func (me *tArrayHeap) parentOf(i int) int { return (i - 1) / 2 }
func (me tArrayHeap) leftChildOf(i int) int { return i2 + 1 }
func (me *tArrayHeap) rightChildOf(i int) int { return me.leftChildOf(i) + 1 }
func (me *tArrayHeap) last() (i int, v interface{}) { if me.IsEmpty() { return -1, nil }
i = me.size - 1v = me.items[i]return i,v
}
func (me *tArrayHeap) shiftUp(i int) { if i <= 0 { return } v := me.items[i]
pi := me.parentOf(i)pv := me.items[pi]if me.comparator(v, pv) == sorting.LESS {me.items[pi], me.items[i] = v, pvme.shiftUp(pi)}
}
func (me *tArrayHeap) Pop() (error, interface{}) { if me.IsEmpty() { return gNoMoreElementsError, nil }
me.version++top := me.items[0]li, lv := me.last()me.items[0] = nilme.size--if me.IsEmpty() {return nil, top}me.items[0] = lvme.items[li] = nilme.shiftDown(0)me.version++return nil, top
}
func (me *tArrayHeap) shiftDown(i int) { pv := me.items[i] ok, ci, cv := me.minChildOf(i) if ok && me.comparator(cv, pv) == sorting.LESS { me.items[i], me.items[ci] = cv, pv me.shiftDown(ci) } }
func (me *tArrayHeap) minChildOf(p int) (ok bool, i int, v interface{}) { li := me.leftChildOf(p) if li >= me.size { return false, 0, nil } lv := me.items[li]
ri := me.rightChildOf(p)if ri >= me.size {return true, li, lv}rv := me.items[ri]if me.comparator(lv, rv) == sorting.LESS {return true, li, lv} else {return true, ri, rv}
}
var gNoMoreElementsError = errors.New(“no more elements”)
<a name="To6Sq"></a># ISorter- 定义排序接口- 定义值比较函数以兼容任意值类型- 通过调整比较函数可实现倒序输出```gopackage sortingtype ISorter interface {Sort(data []interface{}, comparator CompareFunction) []interface{}}type CompareFunction func(a interface{}, b interface{}) CompareResulttype CompareResult intconst LESS CompareResult = -1const EQUAL CompareResult = 0const GREATER CompareResult = 1
tHeapSort
- 堆排序器, 实现ISorter接口
- 利用tArrayHeap进行堆排序
- 先把整个数组push进heap
- 然后逐个pop出来, 即可 ```go package heap_sort
import “learning/gooop/sorting”
type tHeapSort struct { }
func newHeapSort() sorting.ISorter { return &tHeapSort{} }
func (me *tHeapSort) Sort(data []interface{}, comparator sorting.CompareFunction) []interface{} { heap := newArrayHeap(comparator) for _,it := range data { heap.Push(it) }
for i,_ := range data {e,v := heap.Pop()if e == nil {data[i] = v} else {panic(e)}}return data
}
var HeapSort = newHeapSort() ```
(end)
