article - json - 《computer》

标准库 JSON Unmarshal
GJSON">GJSON
jsonparser">jsonparser
性能对比

在之前的时长服务(duration)之前相关的问题记录里面，提到了 JSON Unmarshal 大对象

本来以为标准库的JSON Unmarshal 效率会很高，一般对性能的优化也不会考虑到序列化这一块儿。结果当我

主要对比一下主流的几个golang的JSON库(选择了部分库对比研究)

json库名	Star
官方标准 JSON Unmarshal
tidwall/gjson	8.5k
tinylib/msgp	1.4k
buger/jsonparser	4k

标准库 JSON Unmarshal

先来看一个例子

源码分析(go version go1.16.5 )

JSON 解析库

func Unmarshal(data []byte, v interface{})

在标准库JSON解析前会去调用reflect.ValueOf来获取参数 v 的反射对象，以此来判断以哪种方式来进行解析

func (d *decodeState) value(v reflect.Value) error {
    switch d.opcode {
    default:
        panic(phasePanicMsg)
    case scanBeginArray:  // 数组
        if v.IsValid() {
            if err := d.array(v); err != nil {
                return err
            }
        } else {
            d.skip()
        }
        d.scanNext()
    case scanBeginObject: // 结构体或者map
        if v.IsValid() {
            if err := d.object(v); err != nil {
                return err
            }
        } else {
            d.skip()
        }
        d.scanNext()
    case scanBeginLiteral:  //  int、string、float 等
        // All bytes inside literal return scanContinue op code.
        start := d.readIndex()
        d.rescanLiteral()
        if v.IsValid() {
            if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil {
                return err
            }
        }
    }
    return nil
}

看看解析Object

func (d *decodeState) object(v reflect.Value) error {
    // Check for unmarshaler.
    u, ut, pv := indirect(v, false)
    // ... 
    // Decoding into nil interface? Switch to non-reflect code.
    if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
        oi := d.objectInterface()
        v.Set(reflect.ValueOf(oi))
        return nil
    }
    var fields structFields
    // Check type of target:
    //   struct or
    //   map[T1]T2 where T1 is string, an integer type,
    //             or an encoding.TextUnmarshaler
    switch v.Kind() {
    case reflect.Map:
        // Map key must either have string kind, have an integer kind,
        // or be an encoding.TextUnmarshaler.
        switch t.Key().Kind() {
            // 如果是map的话还要对key进行分类
        case reflect.String,
            reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
            reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
        default:
            if !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) {
                d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)})
                d.skip()
                return nil
            }
        }
        if v.IsNil() {
            v.Set(reflect.MakeMap(t))
        }
    case reflect.Struct
        // 缓存结构体的字段到 fields 对象中
        fields = cachedTypeFields(t)
        // ok
    default:
        d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)})
        d.skip()
        return nil
    }
    var mapElem reflect.Value
    origErrorContext := d.errorContext
    for {
        // Read opening " of string key or closing }.
        d.scanWhile(scanSkipSpace)
        if d.opcode == scanEndObject {
            // closing } - can only happen on first iteration.
            break
        }
        if d.opcode != scanBeginLiteral {
            panic(phasePanicMsg)
        }
        // Read key.
        // 循环解析JSON字符串中的k,v
        start := d.readIndex()
        d.rescanLiteral()
        // 获取k
        item := d.data[start:d.readIndex()]
        key, ok := unquoteBytes(item)
        if !ok {
            panic(phasePanicMsg)
        }
        // Figure out field corresponding to key.
        var subv reflect.Value
        destring := false // whether the value is wrapped in a string to be decoded first
        if v.Kind() == reflect.Map {
            elemType := t.Elem()
            if !mapElem.IsValid() {
                mapElem = reflect.New(elemType).Elem()
            } else {
                mapElem.Set(reflect.Zero(elemType))
            }
            subv = mapElem
        } else {
            var f *field
            if i, ok := fields.nameIndex[string(key)]; ok {
                // Found an exact name match.
                f = &fields.list[i]
            } else {
                // Fall back to the expensive case-insensitive
                // linear search.
                for i := range fields.list {
                    ff := &fields.list[i]
                    if ff.equalFold(ff.nameBytes, key) {
                        f = ff
                        break
                    }
                }
            }
            // ...
        }
        // Read : before value.
        if d.opcode == scanSkipSpace {
            d.scanWhile(scanSkipSpace)
        }
        if d.opcode != scanObjectKey {
            panic(phasePanicMsg)
        }
        d.scanWhile(scanSkipSpace)
        if destring { // 设置v值 
            switch qv := d.valueQuoted().(type) {
            case nil:
                if err := d.literalStore(nullLiteral, subv, false); err != nil {
                    return err
                }
            // ...
            default:
                d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type()))
            }
        } else {
            // 数组或对象会递归调用 value 方法
            if err := d.value(subv); err != nil {
                return err
            }
        }
        // Write value back to map;
        // if using struct, subv points into struct already.
        if v.Kind() == reflect.Map {
            kt := t.Key()
            var kv reflect.Value
            switch {
            case reflect.PtrTo(kt).Implements(textUnmarshalerType):
                kv = reflect.New(kt)
                if err := d.literalStore(item, kv, true); err != nil {
                    return err
                }
                kv = kv.Elem()
            case kt.Kind() == reflect.String:
                kv = reflect.ValueOf(key).Convert(kt)
            default:
            // ...
            }
            if kv.IsValid() {
                v.SetMapIndex(kv, subv)
            }
        }
        // 遇到 } 最后退出循环
        if d.opcode == scanSkipSpace {
            d.scanWhile(scanSkipSpace)
        }
        // Reset errorContext to its original state.
        // Keep the same underlying array for FieldStack, to reuse the
        // space and avoid unnecessary allocs.
        d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)]
        d.errorContext.Struct = origErrorContext.Struct
        if d.opcode == scanEndObject {
            break
        }
        if d.opcode != scanObjectValue {
            panic(phasePanicMsg)
        }
    }
    return nil
}

大概会做这么几件事

缓存结构体
循环遍历结构体对象
找到结构体中的 key 值之后再找到结构体中同名字段类型
递归调用 value 方法反射设置结构体对应的值
直到遍历到 JSON 中结尾 }结束循环。

Unmarshal 源码中使用了大量的反射来获取字段值，而且还有递归调用来获取反射值，如果是多层嵌套的JSON的话。性能会更差

GJSON

对比官方的JSON库，GJSON 优雅太多了，因为 Golang中简单的数据结构可使用map[string]interface{}，但是嵌套复杂的结构需要预定义struct结构体。用json.Unmarshal把数据解析到结构体中，然后get到值，虽然确实会清晰一些。比如不看文档的情况下，和其他系统做交互的情况下。可以有一个清晰的结构体。但是GJSON的代码量会少很多，这也算是它的优势吧。

源自官方demo

package main
import "github.com/tidwall/gjson"
const json = `{"name":{"first":"Janet","last":"Prichard"},"age":47}`
func main() {
    value := gjson.Get(json, "name.last")
    println(value.String())
}

key可以包含特殊的通配符’*’和’?’

比如有这么一个json串

{
  "name": {"first": "Tom", "last": "Anderson"},
  "age":37,
  "children": ["Sara","Alex","Jack"],
  "fav.movie": "Deer Hunter",
  "friends": [
    {"first": "Dale", "last": "Murphy", "age": 44, "nets": ["ig", "fb", "tw"]},
    {"first": "Roger", "last": "Craig", "age": 68, "nets": ["fb", "tw"]},
    {"first": "Jane", "last": "Murphy", "age": 47, "nets": ["ig", "tw"]}
  ]
}

json := `{
    "name":{"first":"Tom", "last": "Anderson"},
    "age": 37,
    "children": ["Sara", "Alex", "Jack"]
}`
fmt.Println("third child*:", gjson.Get(json, "child*.2"))
fmt.Println("first c?ild:", gjson.Get(json, "c?ildren.0"))
// child*.2：首先child*匹配children，.2读取第 3 个元素； 此例子来源于第三方库

来看看get源码
func Get 有2个参数，一个是string json，一个是path(就好像和一个树的路径去匹配一样，)

func Get(json, path string) Result {
    // 当path大于1时才去解析
    if len(path) > 1 { 
        if !DisableModifiers {
            if path[0] == '@' {
                // possible modifier
                var ok bool
                var npath string
                var rjson string
                npath, rjson, ok = execModifier(json, path)
                if ok {
                    path = npath
                    // 会以. | 来对剩下的path做切割
                    if len(path) > 0 && (path[0] == '|' || path[0] == '.') {
                        res := Get(rjson, path[1:])
                        res.Index = 0
                        return res
                    }
                    return Parse(rjson)
                }
            }
        }
        if path[0] == '[' || path[0] == '{' {
            // using a subselector path
            kind := path[0]
            var ok bool
            var subs []subSelector
            subs, path, ok = parseSubSelectors(path)
            if ok {
                if len(path) == 0 || (path[0] == '|' || path[0] == '.') {
                    var b []byte
                    b = append(b, kind)
                    var i int
                    for _, sub := range subs {
                        res := Get(json, sub.path)
                        if res.Exists() {
                            if i > 0 {
                                b = append(b, ',')
                            }
                            if kind == '{' {
                                if len(sub.name) > 0 {
                                    if sub.name[0] == '"' && Valid(sub.name) {
                                        b = append(b, sub.name...)
                                    } else {
                                        b = appendJSONString(b, sub.name)
                                    }
                                } else {
                                    last := nameOfLast(sub.path)
                                    if isSimpleName(last) {
                                        b = appendJSONString(b, last)
                                    } else {
                                        b = appendJSONString(b, "_")
                                    }
                                }
                                b = append(b, ':')
                            }
                            var raw string
                            if len(res.Raw) == 0 {
                                raw = res.String()
                                if len(raw) == 0 {
                                    raw = "null"
                                }
                            } else {
                                raw = res.Raw
                            }
                            b = append(b, raw...)
                            i++
                        }
                    }
                    b = append(b, kind+2)
                    var res Result
                    res.Raw = string(b)
                    res.Type = JSON
                    if len(path) > 0 {
                        res = res.Get(path[1:])
                    }
                    res.Index = 0
                    return res
                }
            }
        }
    }
    var i int
    var c = &parseContext{json: json}
    if len(path) >= 2 && path[0] == '.' && path[1] == '.' {
        c.lines = true
        parseArray(c, 0, path[2:])
    } else {
        for ; i < len(c.json); i++ {
            if c.json[i] == '{' {
                i++
                parseObject(c, i, path)
                break
            }
            if c.json[i] == '[' {
                i++
                parseArray(c, i, path)
                break
            }
        }
    }
    if c.piped {
        res := c.value.Get(c.pipe)
        res.Index = 0
        return res
    }
    fillIndex(json, c)
    return c.value
}

jsonparser

jsonparser 的入参也是传入一个 JSON 的 byte 切片，以及可以通过传入多个 key 值来快速定位到相应的值，并返回。

jsonparser之所以有这么快的速度

它不解析完整的内容，仅仅解析你指定的字段
在字节级别上操做json，提供指向原始数据结构的指针，无内存分配；
没有自动类型转化，默认状况下，全部内容都是字节切片，可是它提供了值类型，所以你能够本身进行转化

使用方式（来自官方demo）

data := []byte(`{
  "person": {
    "name": {
      "first": "Leonid",
      "last": "Bugaev",
      "fullName": "Leonid Bugaev"
    },
    "github": {
      "handle": "buger",
      "followers": 109
    },
    "avatars": [
      { "url": "https://avatars1.githubusercontent.com/u/14009?v=3&s=460", "type": "thumbnail" }
    ]
  },
  "company": {
    "name": "Acme"
  }
}`)
jsonparser.Get(data, "person", "name", "fullName")

它只需要遍历一次 JSON字符串即可实现获取多个值的操作。

如果只是单纯的使用get

func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) {
    a, b, _, d, e := internalGet(data, keys...)
    return a, b, d, e
}

第一个参数 value是一个[]byte类型，实际取值需要手动将其转换为对应的类型。而且还有根据索引去获取值。甚至是遍历key。

性能对比

解析190字节的字符串

库名	bytes/op	bytes/op	allocs/op
encoding/json struct	3314 ns/op	744 B/op	15 allocs/op
encoding/json interface{}	3075 ns/op	1377 B/op	32 allocs/op
buger/jsonparser	671.4 ns/op	0	0
buger/jsonparser (EachKey API)	539.9 ns/op	0	0
pquerna/ffjson	3758 ns/op	620 B/op	15 allocs/op
msgp	291.9 ns/op	208 B/op	5 allocs/op

解析 2.4kb 左右的字符串

库名	bytes/op	bytes/op	allocs/op
encoding/json struct	26198 ns/op	1064 B/op	206 allocs/op
encoding/json interface{}	26674 ns/op	10380 B/op	225 allocs/op
buger/jsonparser	6335 ns/op	0	0
buger/jsonparser (EachKey API)	4616 ns/op	112 B/op	2 allocs/op
pquerna/ffjson	20298 ns/op	856 B/op	20 allocs/op
msgp	202.3 ns/op	112 B/op	4 allocs/op

解析24k左右的字符串

库名	bytes/op	bytes/op	allocs/op
encoding/json struct	334067 ns/op	6101 B/op	211 allocs/op
encoding/json interface{}	412735 ns/op	210656 B/op	2875 allocs/op
buger/jsonparser	46780 ns/op	0	0
pquerna/ffjson	137213 ns/op	5813 B/op	206 allocs/op
msgp	209.8 ns/op	128 B/op	4 allocs/op

总结

使用反射的性能基本上是比较低的，比如JSON官方库，就使用了大量的反射。其他开源的序列化的库通过遍历字节来解析，使得性能高了很多。
有的库根本不需要定义结构体来映射，比如GJSON，可以直接根据JSON字符串来取得值，还支持模糊查找。