map里面的key的hash是怎么实现的
源码:src/runtime/map.go
golang的map是内置关键字,不管是get还是set都需要通过key的hash找到对应的存储实体。具体的hash过程如下代码:
type maptype struct {typ _typekey *_typeelem *_typebucket *_type // internal type representing a hash bucketkeysize uint8 // size of key slotelemsize uint8 // size of elem slotbucketsize uint16 // size of bucketflags uint32}//t *maptypealg := t.key.alghash := alg.hash(key, uintptr(h.hash0))
这里其实就是拿到key对应的类型,然后获取当前key的类型的hash算法。然后调用hash函数。
hash函数的定义在这里:
// typeAlg is also copied/used in reflect/type.go.// keep them in sync.type typeAlg struct {// function for hashing objects of this type// (ptr to object, seed) -> hashhash func(unsafe.Pointer, uintptr) uintptr// function for comparing objects of this type// (ptr to object A, ptr to object B) -> ==?equal func(unsafe.Pointer, unsafe.Pointer) bool}
可以知道,入参是指向key的指针,第二个参数是hash种子。
golang里面的对象的hash原理
golang里面每种数据类型的hash是与数据类型强相关的,并且是由编译器负责做类型绑定的。在src/runtime/alg.go 里面定义个一个map[type]typeAlg,用于表示每个数据类型对应的 typeAlg。
var algarray = [alg_max]typeAlg{alg_NOEQ: {nil, nil},alg_MEM0: {memhash0, memequal0},alg_MEM8: {memhash8, memequal8},alg_MEM16: {memhash16, memequal16},alg_MEM32: {memhash32, memequal32},alg_MEM64: {memhash64, memequal64},alg_MEM128: {memhash128, memequal128},alg_STRING: {strhash, strequal},alg_INTER: {interhash, interequal},alg_NILINTER: {nilinterhash, nilinterequal},alg_FLOAT32: {f32hash, f32equal},alg_FLOAT64: {f64hash, f64equal},alg_CPLX64: {c64hash, c64equal},alg_CPLX128: {c128hash, c128equal},}
上面的hash函数,最后实际上底层都会调用:func memhash(p unsafe.Pointer, seed, s uintptr) uintptr 函数。以strhash函数为例:
func strhash(a unsafe.Pointer, h uintptr) uintptr {x := (*stringStruct)(a)return memhash(x.str, h, uintptr(x.len))}
下面看一下memhash函数, src/runtime/hash64.go
// p表示需要hash的对象的地址// seed 是hash 种子,// s是需要hash的对象的字节数func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {if (GOARCH == "amd64" || GOARCH == "arm64") &&GOOS != "nacl" && useAeshash {return aeshash(p, seed, s)}h := uint64(seed + s*hashkey[0])tail:switch {case s == 0:case s < 4:h ^= uint64(*(*byte)(p))h ^= uint64(*(*byte)(add(p, s>>1))) << 8h ^= uint64(*(*byte)(add(p, s-1))) << 16h = rotl_31(h*m1) * m2case s <= 8:h ^= uint64(readUnaligned32(p))h ^= uint64(readUnaligned32(add(p, s-4))) << 32h = rotl_31(h*m1) * m2case s <= 16:h ^= readUnaligned64(p)h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, s-8))h = rotl_31(h*m1) * m2case s <= 32:h ^= readUnaligned64(p)h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, 8))h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, s-16))h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, s-8))h = rotl_31(h*m1) * m2default:v1 := hv2 := uint64(seed * hashkey[1])v3 := uint64(seed * hashkey[2])v4 := uint64(seed * hashkey[3])for s >= 32 {v1 ^= readUnaligned64(p)v1 = rotl_31(v1*m1) * m2p = add(p, 8)v2 ^= readUnaligned64(p)v2 = rotl_31(v2*m2) * m3p = add(p, 8)v3 ^= readUnaligned64(p)v3 = rotl_31(v3*m3) * m4p = add(p, 8)v4 ^= readUnaligned64(p)v4 = rotl_31(v4*m4) * m1p = add(p, 8)s -= 32}h = v1 ^ v2 ^ v3 ^ v4goto tail}h ^= h >> 29h *= m3h ^= h >> 32return uintptr(h)}
这个就是整体的hash实现。 对于amd64会使用汇编实现的aeshash函数计算hash。
具体aeshash实现,有兴趣可以看汇编:src/runtime/asm_amd64.s 里面的 runtime·aeshash 函数。
Hash 测试
下面以string作为key,来测试以下三种算法的性能:
- fnv算法
- memhash算法
- aeshash的汇编算法
fnv
测试背景:10000个uuid string;
const LEN = 10000// the length of element is 36var keys [LEN][]bytefunc init(){for i:=0; i<LEN; i++ {k := uuid.New().String()keys[i] = []byte(k)}}// 777000nsfunc main(){start := time.Now().UnixNano()for _, k := range keys {h := fnv.New64()h.Write(k)h.Sum64()}end := time.Now().UnixNano()fmt.Println("total time:", end-start, "ns")}
执行结果:10000次hash大约是727000ns,也就是平均每次hash要花费72ns。
memhash
这个是调用golang runtime里面的内嵌hash代码,这部分是我从runtime里面copy出来的。
代码如下:
package mainconst LEN = 10000// the length of element is 36var keys [LEN]stringfunc init(){for i:=0; i<LEN; i++ {keys[i] = uuid.New().String()}}// 178000nsfunc main() {start := time.Now().UnixNano()for _, k := range keys {memhash(unsafe.Pointer(&k), 1, 36)}end := time.Now().UnixNano()fmt.Println("total time:", end-start, "ns")}// used in hash{32,64}.go to seed the hash functionvar hashkey [4]uintptrconst PtrSize = 4 << (^uintptr(0) >> 63)func init() {for i:=0; i<4; i++ {hashkey[i]= uintptr(rand.Int63())}hashkey[0] |= 1 // make sure these numbers are oddhashkey[1] |= 1hashkey[2] |= 1hashkey[3] |= 1}func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {return unsafe.Pointer(uintptr(p) + x)}func rotl_31(x uint64) uint64 {return (x << 31) | (x >> (64 - 31))}const (// Constants for multiplication: four random odd 64-bit numbers.m1 = 16877499708836156737m2 = 2820277070424839065m3 = 9497967016996688599m4 = 15839092249703872147)const (BigEndian = falseDefaultPhysPageSize = 4096PCQuantum = 1Int64Align = 8MinFrameSize = 0)func readUnaligned64(p unsafe.Pointer) uint64 {q := (*[8]byte)(p)if BigEndian {return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 |uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56}return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56}// Note: These routines perform the read with an native endianness.func readUnaligned32(p unsafe.Pointer) uint32 {q := (*[4]byte)(p)if BigEndian {return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24}return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24}func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {h := uint64(seed + s*hashkey[0])tail:switch {case s == 0:case s < 4:h ^= uint64(*(*byte)(p))h ^= uint64(*(*byte)(add(p, s>>1))) << 8h ^= uint64(*(*byte)(add(p, s-1))) << 16h = rotl_31(h*m1) * m2case s <= 8:h ^= uint64(readUnaligned32(p))h ^= uint64(readUnaligned32(add(p, s-4))) << 32h = rotl_31(h*m1) * m2case s <= 16:h ^= readUnaligned64(p)h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, s-8))h = rotl_31(h*m1) * m2case s <= 32:h ^= readUnaligned64(p)h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, 8))h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, s-16))h = rotl_31(h*m1) * m2h ^= readUnaligned64(add(p, s-8))h = rotl_31(h*m1) * m2default:v1 := hv2 := uint64(seed * hashkey[1])v3 := uint64(seed * hashkey[2])v4 := uint64(seed * hashkey[3])for s >= 32 {v1 ^= readUnaligned64(p)v1 = rotl_31(v1*m1) * m2p = add(p, 8)v2 ^= readUnaligned64(p)v2 = rotl_31(v2*m2) * m3p = add(p, 8)v3 ^= readUnaligned64(p)v3 = rotl_31(v3*m3) * m4p = add(p, 8)v4 ^= readUnaligned64(p)v4 = rotl_31(v4*m4) * m1p = add(p, 8)s -= 32}h = v1 ^ v2 ^ v3 ^ v4goto tail}h ^= h >> 29h *= m3h ^= h >> 32return uintptr(h)}
执行结果:10000次hash大约是178000ns,也就是平均每次hash要花费17.8ns。
aeshash汇编
这部分代码也是从runtime asm代码copy出来做测试:
注意,需要把src/runtime/go_tls.h、funcdata.h、textflag.h 这个三个头文件放在当前目录下。
// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.#include "go_asm.h"#include "go_tls.h"#include "funcdata.h"#include "textflag.h"//louyuting// func aeshashstr(p unsafe.Pointer, h uintptr) uintptrTEXT ·aeshashstr2(SB),NOSPLIT,$0-24MOVQ p+0(FP), AX // ptr to string structMOVQ 8(AX), CX // length of stringMOVQ (AX), AX // string dataLEAQ ret+16(FP), DXJMP aeshashbody<>(SB)// AX: data// CX: length// DX: address to put return valueTEXT aeshashbody<>(SB),NOSPLIT,$0-0// Fill an SSE register with our seeds.MOVQ h+8(FP), X0 // 64 bits of per-table hash seedPINSRW $4, CX, X0 // 16 bits of lengthPSHUFHW $0, X0, X0 // repeat length 4 times totalMOVO X0, X1 // save unscrambled seedPXOR main·aeskeysched(SB), X0 // xor in per-process seedAESENC X0, X0 // scramble seedCMPQ CX, $16JB aes0to15JE aes16CMPQ CX, $32JBE aes17to32CMPQ CX, $64JBE aes33to64CMPQ CX, $128JBE aes65to128JMP aes129plusaes0to15:TESTQ CX, CXJE aes0ADDQ $16, AXTESTW $0xff0, AXJE endofpage// 16 bytes loaded at this address won't cross// a page boundary, so we can load it directly.MOVOU -16(AX), X1ADDQ CX, CXMOVQ $masks<>(SB), AXPAND (AX)(CX*8), X1final1:PXOR X0, X1 // xor data with seedAESENC X1, X1 // scramble combo 3 timesAESENC X1, X1AESENC X1, X1MOVQ X1, (DX)RETendofpage:// address ends in 1111xxxx. Might be up against// a page boundary, so load ending at last byte.// Then shift bytes down using pshufb.MOVOU -32(AX)(CX*1), X1ADDQ CX, CXMOVQ $shifts<>(SB), AXPSHUFB (AX)(CX*8), X1JMP final1aes0:// Return scrambled input seedAESENC X0, X0MOVQ X0, (DX)RETaes16:MOVOU (AX), X1JMP final1aes17to32:// make second starting seedPXOR main·aeskeysched+16(SB), X1AESENC X1, X1// load data to be hashedMOVOU (AX), X2MOVOU -16(AX)(CX*1), X3// xor with seedPXOR X0, X2PXOR X1, X3// scramble 3 timesAESENC X2, X2AESENC X3, X3AESENC X2, X2AESENC X3, X3AESENC X2, X2AESENC X3, X3// combine resultsPXOR X3, X2MOVQ X2, (DX)RETaes33to64:// make 3 more starting seedsMOVO X1, X2MOVO X1, X3PXOR main·aeskeysched+16(SB), X1PXOR main·aeskeysched+32(SB), X2PXOR main·aeskeysched+48(SB), X3AESENC X1, X1AESENC X2, X2AESENC X3, X3MOVOU (AX), X4MOVOU 16(AX), X5MOVOU -32(AX)(CX*1), X6MOVOU -16(AX)(CX*1), X7PXOR X0, X4PXOR X1, X5PXOR X2, X6PXOR X3, X7AESENC X4, X4AESENC X5, X5AESENC X6, X6AESENC X7, X7AESENC X4, X4AESENC X5, X5AESENC X6, X6AESENC X7, X7AESENC X4, X4AESENC X5, X5AESENC X6, X6AESENC X7, X7PXOR X6, X4PXOR X7, X5PXOR X5, X4MOVQ X4, (DX)RETaes65to128:// make 7 more starting seedsMOVO X1, X2MOVO X1, X3MOVO X1, X4MOVO X1, X5MOVO X1, X6MOVO X1, X7PXOR main·aeskeysched+16(SB), X1PXOR main·aeskeysched+32(SB), X2PXOR main·aeskeysched+48(SB), X3PXOR main·aeskeysched+64(SB), X4PXOR main·aeskeysched+80(SB), X5PXOR main·aeskeysched+96(SB), X6PXOR main·aeskeysched+112(SB), X7AESENC X1, X1AESENC X2, X2AESENC X3, X3AESENC X4, X4AESENC X5, X5AESENC X6, X6AESENC X7, X7// load dataMOVOU (AX), X8MOVOU 16(AX), X9MOVOU 32(AX), X10MOVOU 48(AX), X11MOVOU -64(AX)(CX*1), X12MOVOU -48(AX)(CX*1), X13MOVOU -32(AX)(CX*1), X14MOVOU -16(AX)(CX*1), X15// xor with seedPXOR X0, X8PXOR X1, X9PXOR X2, X10PXOR X3, X11PXOR X4, X12PXOR X5, X13PXOR X6, X14PXOR X7, X15// scramble 3 timesAESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15AESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15AESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15// combine resultsPXOR X12, X8PXOR X13, X9PXOR X14, X10PXOR X15, X11PXOR X10, X8PXOR X11, X9PXOR X9, X8MOVQ X8, (DX)RETaes129plus:// make 7 more starting seedsMOVO X1, X2MOVO X1, X3MOVO X1, X4MOVO X1, X5MOVO X1, X6MOVO X1, X7PXOR main·aeskeysched+16(SB), X1PXOR main·aeskeysched+32(SB), X2PXOR main·aeskeysched+48(SB), X3PXOR main·aeskeysched+64(SB), X4PXOR main·aeskeysched+80(SB), X5PXOR main·aeskeysched+96(SB), X6PXOR main·aeskeysched+112(SB), X7AESENC X1, X1AESENC X2, X2AESENC X3, X3AESENC X4, X4AESENC X5, X5AESENC X6, X6AESENC X7, X7// start with last (possibly overlapping) blockMOVOU -128(AX)(CX*1), X8MOVOU -112(AX)(CX*1), X9MOVOU -96(AX)(CX*1), X10MOVOU -80(AX)(CX*1), X11MOVOU -64(AX)(CX*1), X12MOVOU -48(AX)(CX*1), X13MOVOU -32(AX)(CX*1), X14MOVOU -16(AX)(CX*1), X15// xor in seedPXOR X0, X8PXOR X1, X9PXOR X2, X10PXOR X3, X11PXOR X4, X12PXOR X5, X13PXOR X6, X14PXOR X7, X15// compute number of remaining 128-byte blocksDECQ CXSHRQ $7, CXaesloop:// scramble stateAESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15// scramble state, xor in a blockMOVOU (AX), X0MOVOU 16(AX), X1MOVOU 32(AX), X2MOVOU 48(AX), X3AESENC X0, X8AESENC X1, X9AESENC X2, X10AESENC X3, X11MOVOU 64(AX), X4MOVOU 80(AX), X5MOVOU 96(AX), X6MOVOU 112(AX), X7AESENC X4, X12AESENC X5, X13AESENC X6, X14AESENC X7, X15ADDQ $128, AXDECQ CXJNE aesloop// 3 more scrambles to finishAESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15AESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15AESENC X8, X8AESENC X9, X9AESENC X10, X10AESENC X11, X11AESENC X12, X12AESENC X13, X13AESENC X14, X14AESENC X15, X15PXOR X12, X8PXOR X13, X9PXOR X14, X10PXOR X15, X11PXOR X10, X8PXOR X11, X9PXOR X9, X8MOVQ X8, (DX)RET// simple mask to get rid of data in the high part of the register.DATA masks<>+0x00(SB)/8, $0x0000000000000000DATA masks<>+0x08(SB)/8, $0x0000000000000000DATA masks<>+0x10(SB)/8, $0x00000000000000ffDATA masks<>+0x18(SB)/8, $0x0000000000000000DATA masks<>+0x20(SB)/8, $0x000000000000ffffDATA masks<>+0x28(SB)/8, $0x0000000000000000DATA masks<>+0x30(SB)/8, $0x0000000000ffffffDATA masks<>+0x38(SB)/8, $0x0000000000000000DATA masks<>+0x40(SB)/8, $0x00000000ffffffffDATA masks<>+0x48(SB)/8, $0x0000000000000000DATA masks<>+0x50(SB)/8, $0x000000ffffffffffDATA masks<>+0x58(SB)/8, $0x0000000000000000DATA masks<>+0x60(SB)/8, $0x0000ffffffffffffDATA masks<>+0x68(SB)/8, $0x0000000000000000DATA masks<>+0x70(SB)/8, $0x00ffffffffffffffDATA masks<>+0x78(SB)/8, $0x0000000000000000DATA masks<>+0x80(SB)/8, $0xffffffffffffffffDATA masks<>+0x88(SB)/8, $0x0000000000000000DATA masks<>+0x90(SB)/8, $0xffffffffffffffffDATA masks<>+0x98(SB)/8, $0x00000000000000ffDATA masks<>+0xa0(SB)/8, $0xffffffffffffffffDATA masks<>+0xa8(SB)/8, $0x000000000000ffffDATA masks<>+0xb0(SB)/8, $0xffffffffffffffffDATA masks<>+0xb8(SB)/8, $0x0000000000ffffffDATA masks<>+0xc0(SB)/8, $0xffffffffffffffffDATA masks<>+0xc8(SB)/8, $0x00000000ffffffffDATA masks<>+0xd0(SB)/8, $0xffffffffffffffffDATA masks<>+0xd8(SB)/8, $0x000000ffffffffffDATA masks<>+0xe0(SB)/8, $0xffffffffffffffffDATA masks<>+0xe8(SB)/8, $0x0000ffffffffffffDATA masks<>+0xf0(SB)/8, $0xffffffffffffffffDATA masks<>+0xf8(SB)/8, $0x00ffffffffffffffGLOBL masks<>(SB),RODATA,$256// func checkASM() boolTEXT ·checkASM(SB),NOSPLIT,$0-1// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byteMOVQ $masks<>(SB), AXMOVQ $shifts<>(SB), BXORQ BX, AXTESTQ $15, AXSETEQ ret+0(FP)RET// these are arguments to pshufb. They move data down from// the high bytes of the register to the low bytes of the register.// index is how many bytes to move.DATA shifts<>+0x00(SB)/8, $0x0000000000000000DATA shifts<>+0x08(SB)/8, $0x0000000000000000DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0fDATA shifts<>+0x18(SB)/8, $0xffffffffffffffffDATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0eDATA shifts<>+0x28(SB)/8, $0xffffffffffffffffDATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0dDATA shifts<>+0x38(SB)/8, $0xffffffffffffffffDATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0cDATA shifts<>+0x48(SB)/8, $0xffffffffffffffffDATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0bDATA shifts<>+0x58(SB)/8, $0xffffffffffffffffDATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0aDATA shifts<>+0x68(SB)/8, $0xffffffffffffffffDATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09DATA shifts<>+0x78(SB)/8, $0xffffffffffffffffDATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908DATA shifts<>+0x88(SB)/8, $0xffffffffffffffffDATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0fDATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0eDATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0dDATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0cDATA shifts<>+0xd0(SB)/8, $0x0a09080706050403DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0bDATA shifts<>+0xe0(SB)/8, $0x0908070605040302DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0aDATA shifts<>+0xf0(SB)/8, $0x0807060504030201DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09GLOBL shifts<>(SB),RODATA,$256
测试结果来看,10000个记录,大概也是182000ns, 每次大概是18.2ns。
总结:
| 算法 | per hash cost |
|---|---|
| fnv | 72ns |
| memhash | 17.8ns |
| aeshash 汇编 | 18.2ns |
从测试来看,memhash的hash和aeshash性能差不多,但是比fnv就要好很多了。
转自:https://blog.csdn.net/u010853261/article/details/104148615
