iOS底层原理--008：消息快速查找

1. CacheLookup
2. LLookupStart
3. 查找imp
4. 流程图
5. 真机运行汇编
- 5.1 命中断点
- 5.2 汇编结合动态调试
总结

objc_msgSend流程：

当receiver存在，获取消息接收者的isa
使用isa & ISA_MASK，得到类对象
成功得到类对象，进入CacheLookup缓存查找流程，也就是所谓的sel-imp快速查找流程

1. `CacheLookup`

.macro CacheLookup Mode, Function, MissLabelDynamic, MissLabelConstant
    mov    x15, x16            // stash the original isa

入参：

◦ Mode：NORMAL
◦ Function：_objc_msgSend
◦ MissLabelDynamic：__objc_msgSend_uncached
◦ MissLabelConstant：参数缺失

mov x15, x16：将x16寄存器的值，赋值x15寄存器

◦ x15寄存器：存储类对象

2. `LLookupStart`

LLookupStart\Function:
    // p1 = SEL, p16 = isa
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    ldr    p10, [x16, #CACHE]                // p10 = mask|buckets
    lsr    p11, p10, #48            // p11 = mask
    and    p10, p10, #0xffffffffffff    // p10 = buckets
    and    w12, w1, w11            // x12 = _cmd & mask
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    ldr    p11, [x16, #CACHE]            // p11 = mask|buckets
    #if CONFIG_USE_PREOPT_CACHES
        #if __has_feature(ptrauth_calls)
            tbnz    p11, #0, LLookupPreopt\Function
            and    p10, p11, #0x0000ffffffffffff    // p10 = buckets
        #else
            and    p10, p11, #0x0000fffffffffffe    // p10 = buckets
            tbnz    p11, #0, LLookupPreopt\Function
        #endif
        eor    p12, p1, p1, LSR #7
        and    p12, p12, p11, LSR #48        // x12 = (_cmd ^ (_cmd >> 7)) & mask
    #else
        and    p10, p11, #0x0000ffffffffffff    // p10 = buckets
        and    p12, p1, p11, LSR #48        // x12 = _cmd & mask
    #endif // CONFIG_USE_PREOPT_CACHES
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    ldr    p11, [x16, #CACHE]                // p11 = mask|buckets
    and    p10, p11, #~0xf            // p10 = buckets
    and    p11, p11, #0xf            // p11 = maskShift
    mov    p12, #0xffff
    lsr    p11, p12, p11            // p11 = mask = 0xffff >> p11
    and    p12, p1, p11            // x12 = _cmd & mask
#else
    #error Unsupported cache mask storage for ARM64.
#endif

真机arm64架构，执行CACHE_MASK_STORAGE_HIGH_16的代码分支
ldr p11, [x16, #CACHE]：

◦ CACHE定义：#define CACHE (2 * __SIZEOF_POINTER__)
◦ 真机arm64架构，__SIZEOF_POINTER__值为8
◦ #CACHE：值为16
◦ 将类对象首地址，内存平移16字节，取地址，赋值p11寄存器
◦ p11寄存器：存储cache首地址，即：_bucketsAndMaybeMask

项目中CONFIG_USE_PREOPT_CACHES定义为1，进入if分支
查看处理器Apple A12以下版本设备的分支，进入else流程
and p10, p11, #0x0000fffffffffffe：

◦ #0x0000fffffffffffe：bucketsMask，在低48位中，1-47位为1，0位为0
◦ 将_bucketsAndMaybeMask & bucketsMask的结果，赋值p10寄存器
◦ p10寄存器：存储buckets首地址

tbnz p11, #0, LLookupPreopt\Function：

◦ tbnz：测试位不为0，则跳转
◦ p11的0号位不为0，进入LLookupPreopt流程
◦ 否则，继续下面的代码流程

eor p12, p1, p1, LSR #7：

◦ p1寄存器：SEL方法编号，即：_cmd
◦ 将_cmd ^ (``_cmd >> 7)的结果，赋值p12寄存器

and p12, p12, p11, LSR #48：

◦ _bucketsAndMaybeMask >> 48得到mask
◦ 将p12 & mask的结果，赋值p12寄存器
◦ p12寄存器：存储sel下标

2.1 `_bucketsAndMaybeMask`的`0`号位

在objc源码中

    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
    // _maybeMask is unused, the mask is stored in the top 16 bits.
    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;
    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;
    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

_bucketsAndMaybeMask中，高16位存储maybeMask，低48位存储buckets，同时还为msgSend预留4位，判断是否存在PreoptCache

在汇编代码中

tbnz    p11, #0, LLookupPreopt\Function

判断p11的0号位不为0，相当于存在PreoptCache，进入LLookupPreopt流程，查找共享缓存

2.2 `LLookupPreopt`

LLookupPreopt\Function:
#if __has_feature(ptrauth_calls)
    and    p10, p11, #0x007ffffffffffffe    // p10 = buckets
    autdb    x10, x16            // auth as early as possible
#endif
    // x12 = (_cmd - first_shared_cache_sel)
    adrp    x9, _MagicSelRef@PAGE
    ldr    p9, [x9, _MagicSelRef@PAGEOFF]
    sub    p12, p1, p9
    // w9  = ((_cmd - first_shared_cache_sel) >> hash_shift & hash_mask)
#if __has_feature(ptrauth_calls)
    // bits 63..60 of x11 are the number of bits in hash_mask
    // bits 59..55 of x11 is hash_shift
    lsr    x17, x11, #55            // w17 = (hash_shift, ...)
    lsr    w9, w12, w17            // >>= shift
    lsr    x17, x11, #60            // w17 = mask_bits
    mov    x11, #0x7fff
    lsr    x11, x11, x17            // p11 = mask (0x7fff >> mask_bits)
    and    x9, x9, x11            // &= mask
#else
    // bits 63..53 of x11 is hash_mask
    // bits 52..48 of x11 is hash_shift
    lsr    x17, x11, #48            // w17 = (hash_shift, hash_mask)
    lsr    w9, w12, w17            // >>= shift
    and    x9, x9, x11, LSR #53        // &=  mask
#endif
    ldr    x17, [x10, x9, LSL #3]        // x17 == sel_offs | (imp_offs << 32)
    cmp    x12, w17, uxtw
.if \Mode == GETIMP
    b.ne    \MissLabelConstant        // cache miss
    sub    x0, x16, x17, LSR #32        // imp = isa - imp_offs
    SignAsImp x0
    ret
.else
    b.ne    5f                // cache miss
    sub    x17, x16, x17, LSR #32        // imp = isa - imp_offs
.if \Mode == NORMAL
    br    x17
.elseif \Mode == LOOKUP
    orr x16, x16, #3 // for instrumentation, note that we hit a constant cache
    SignAsImp x17
    ret
.else
.abort  unhandled mode \Mode
.endif

跳过Apple A12或更高版本处理器设备的代码逻辑
adrp x9, _MagicSelRef@PAGE~ldr p9, [x9, _MagicSelRef@PAGEOFF]：对p9寄存器进行内存处理，得到第一个缓存的sel，赋值p9寄存器
sub p12, p1, p9：将_cmd - first_shared_cache_sel的结果，赋值p12寄存器
查看处理器Apple A12以下版本设备的分支，进入else流程
lsr x17, x11, #48：将_bucketsAndMaybeMask >> 48的结果，赋值x17寄存器
lsr w9, w12, w17：将w12 >> w17的结果，赋值w9寄存器
and x9, x9, x11, LSR #53：将x9 & (_bucketsAndMaybeMask >> 53)的结果，赋值x9寄存器
ldr x17, [x10, x9, LSL #3]：将x10 + (x9 << 3)内存中的值，赋值x17寄存器
cmp x12, w17, uxtw：uxtw为32位逻辑左移，判断x17 == sel_offs | (imp_offs << 32)
Mode不等于GETIMP，进入else流程
b.ne 5f：cache miss
sub x17, x16, x17, LSR #32：imp = isa - imp_offs，赋值x17寄存器
查看Mode等于NORMAL的代码流程
br x17：跳转到指定imp函数地址

2.3 对照源码中的`cache_hash`函数

eor    p12, p1, p1, LSR #7
and    p12, p12, p11, LSR #48        // x12 = (_cmd ^ (_cmd >> 7)) & mask
-------------------------
static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

存储和读取使用相同的哈希算法，计算出sel的下标

3. 查找`imp`

3.1 获取当前下标的`bucket`

add    p13, p10, p12, LSL #(1+PTRSHIFT)
                    // p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

PTRSHIFT：真机arm64架构，PTRSHIFT定义为3
add p13, p10, p12, LSL #(1+PTRSHIFT)：

◦ p12存储的下标为0,1,2,3..，不能直接用作内存平移
◦ bucket_t中存储imp和sel，占16字节。p12 << 4等同于i * 16，以bucket_t的长度作为内存平移单位
◦ buckets + (i << 4)：将buckets进行内存平移，得到指定下标的bucket，赋值p13寄存器

3.2 流程1

                        // do {
1:    ldp    p17, p9, [x13], #-BUCKET_SIZE    //     {imp, sel} = *bucket--
    cmp    p9, p1                //     if (sel != _cmd) {
    b.ne    3f                //         scan more
                        //     } else {

ldp p17, p9, [x13], #-BUCKET_SIZE：

◦ BUCKET_SIZE定义：#define BUCKET_SIZE (2 * __SIZEOF_POINTER__)
◦ #-BUCKET_SIZE：值为-16
◦ 对x13寄存器取地址，得到bucket
◦ 将bucket中imp和sel的，分别赋值给p17和p9寄存器
◦ 执行x13 += (-16)的操作，赋值x13寄存器
◦ x13寄存器：上一个bucket地址

cmp p9, p1：当前bucket中的sel和传入的_cmd比较
b.ne 3f：不等，进入流程3
否则，继续执行流程2

3.3 `流程2`

2:    CacheHit \Mode                // hit:    call or return imp
                        //     }

进入CacheHit流程，缓存命中流程

3.3.1 `CacheHit`

.macro CacheHit
.if $0 == NORMAL
    TailCallCachedImp x17, x10, x1, x16    // authenticate and call imp
.elseif $0 == GETIMP
    mov    p0, p17
    cbz    p0, 9f            // don't ptrauth a nil imp
    AuthAndResignAsIMP x0, x10, x1, x16    // authenticate imp and re-sign as IMP

CacheHit：缓存命中流程
查看Mode等于NORMAL的代码流程
进入TailCallCachedImp流程

3.3.2 `TailCallCachedImp`

.macro TailCallCachedImp
    // $0 = cached imp, $1 = address of cached imp, $2 = SEL, $3 = isa
    eor    $0, $0, $3
    br    $0
.endmacro

eor $0, $0, $3：按位异或，imp = imp ^ cls，相当于解码
br $0：跳转到指定imp函数地址

3.4 `流程3`

3:    cbz    p9, \MissLabelDynamic        //     if (sel == 0) goto Miss;
    cmp    p13, p10            // } while (bucket >= buckets)
    b.hs    1b

cbz p9, \MissLabelDynamic：

◦ p9寄存器：存储sel
◦ 如果sel为空，说明当前bucket未关联imp和sel，进入MissLabelDynamic流程
◦ MissLabelDynamic：即__objc_msgSend_uncached流程

cmp p13, p10：上一个bucket地址和buckets首地址比较
b.hs 1b：如果>=首地址，进入流程1
否则，<首地址，继续执行代码

3.5 小于`buckets`首地址

#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    add    p13, p10, w11, UXTW #(1+PTRSHIFT)
                        // p13 = buckets + (mask << 1+PTRSHIFT)
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    add    p13, p10, p11, LSR #(48 - (1+PTRSHIFT))
                        // p13 = buckets + (mask << 1+PTRSHIFT)
                        // see comment about maskZeroBits
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    add    p13, p10, p11, LSL #(1+PTRSHIFT)
                        // p13 = buckets + (mask << 1+PTRSHIFT)
#else
#error Unsupported cache mask storage for ARM64.
#endif
    add    p12, p10, p12, LSL #(1+PTRSHIFT)
                        // p12 = first probed bucket

真机arm64架构，执行CACHE_MASK_STORAGE_HIGH_16的代码分支
add p13, p10, p11, LSR #(48 - (1+PTRSHIFT))：

◦ PTRSHIFT定义：#define PTRSHIFT 3
◦ p10寄存器：存储buckets首地址
◦ p11寄存器：存储cache首地址，即：_bucketsAndMaybeMask
◦ 由于_bucketsAndMaybeMask >> 48为mask，所以_bucketsAndMaybeMask >> 44等同于mask << 4
◦ buckets + (mask << 4)：将buckets进行内存平移，得到mask下标的bucket，赋值p13寄存器

add p12, p10, p12, LSL #(1+PTRSHIFT)：

◦ p12寄存器：存储sel下标
◦ buckets + (i << 4)：将buckets进行内存平移，得到指定下标的bucket，赋值p12寄存器

继续执行流程4

3.6 流程4

                        // do {
4:    ldp    p17, p9, [x13], #-BUCKET_SIZE    //     {imp, sel} = *bucket--
    cmp    p9, p1                //     if (sel == _cmd)
    b.eq    2b                //         goto hit
    cmp    p9, #0                // } while (sel != 0 &&
    ccmp    p13, p12, #0, ne        //     bucket > first_probed)
    b.hi    4b

ldp p17, p9, [x13], #-BUCKET_SIZE：

◦ 将mask下标的bucket中imp和sel，分别赋值给p17和p9寄存器
◦ 执行x13 += (-16)的操作，赋值x13寄存器
◦ x13寄存器：上一个bucket地址

cmp p9, p1：当前bucket中的sel和传入的_cmd比较
b.eq 2b：相等，进入流程2
否则，继续执行代码
cmp p9, #0：sel和#0比较
ccmp p13, p12, #0, ne：上一个bucket地址和指定下标bucket地址比较
b.hi 4b：如果sel存在，并且>指定下标bucket地址，进入流程4
否则，继续执行LLookupEnd流程

3.6.1 `LLookupEnd`

LLookupEnd\Function:
LLookupRecover\Function:
    b    \MissLabelDynamic

b \MissLabelDynamic：

◦ 进入MissLabelDynamic流程
◦ MissLabelDynamic：即__objc_msgSend_uncached流程

4. 流程图

5. 真机运行汇编

真机运行项目，汇编结合动态调试，可以更高效的进行代码解读

5.1 命中断点

搭建App工程

创建LGPerson类，在main函数中，调用LGPerson实例方法

设置断点，运行项目，命中objc_msgSend断点

使用lldb，打印x0、x1寄存器，确定objc_msgSend断点由LGPerson实例方法触发

(lldb) register read x1
      x1 = 0x00000001040e264c  "sayNB"
(lldb) register read x0
      x0 = 0x00000002832453d0
(lldb) po 0x00000002832453d0
<LGPerson: 0x2832453d0>

5.2 汇编结合动态调试

汇编代码

消息接收者和x0比较
如果<=0，进入0x19f98e668函数地址
实例对象取地址，赋值x13
x13 & mask，得到类对象地址，赋值x16
x16的值，赋值x15
类对象首地址 + #0x10，得到cache，赋值x11
x11 & bucketsMask，得到buckets，赋值x10
判断_bucketsAndMaybeMask的0号位不为0，进入0x19f98e630函数地址，查找共享缓存
否则，继续执行代码
_cmd = _cmd >> 7，赋值x12
_bucketsAndMaybeMask >> 48，获取maybeMas
x12 & maybeMask，获取_cmd的下标
buckets + (i << 4)，内存平移，得到指定bucket，赋值x13
x13取地址，将imp和sel，分别赋值x17和x9
x13 += #-0x10，得到上一个bucket地址
…

使用lldb，打印x17寄存器，查看imp

(lldb)  register read x17
     x17 = 0x000000009b9473c8
(lldb) register read x16
     x16 = 0x00000001040e9548  (void *)0x00000001040e9520: LGPerson
(lldb) po (IMP)(0x000000009b9473c8 ^ 0x00000001040e9548)
(libobjc.A.dylib`-[NSObject debugDescription])

使用imp ^ cls解码，然后输出imp

打印x9寄存器，查看sel

(lldb) register read x9
      x9 = 0x00000001d31c8c18  
(lldb) po (SEL)0x00000001d31c8c18
"debugDescription"

打印函数地址的汇编代码，例如：打印“查找共享缓存”的汇编代码

(lldb) dis -s 0x19f98e668
libobjc.A.dylib`objc_msgSend:
    0x19f98e668 <+168>: b.eq   0x19f98e68c               ; <+204>
    0x19f98e66c <+172>: and    x10, x0, #0x7
    0x19f98e670 <+176>: asr    x11, x0, #55
    0x19f98e674 <+180>: cmp    x10, #0x7                 ; =0x7 
    0x19f98e678 <+184>: csel   x12, x11, x10, eq
    0x19f98e67c <+188>: adrp   x10, 267833
    0x19f98e680 <+192>: add    x10, x10, #0xb20          ; =0xb20 
    0x19f98e684 <+196>: ldr    x16, [x10, x12, lsl #3]

总结

快速查找流程为什么使用汇编实现？

快速查找流程，即：方法缓存查找，目的就是提升效率。使用汇编实现，因为汇编代码最接近机器语言，可以最大程度优化存储空间与执行时间
汇编代码对于动态参数、可变参数有更好的支持

快速查找流程：

将类对象地址，内存平移16字节，取地址，得到cache首地址，即：_bucketsAndMaybeMask
_bucketsAndMaybeMask & bucketsMask，获取buckets首地址
判断_bucketsAndMaybeMask的0号位不为0，进入LLookupPreopt流程，查找共享缓存
否则，通过_bucketsAndMaybeMask >> 48，得到mask
(_cmd ^ (_cmd >> 7)) & mask，得到下标i。源码中的cache_hash函数
通过i * 16得到偏移值，buckets首地址+偏移值，得到指定下标的bucket
流程1：

◦ 读取bucket_t中的imp和sel
◦ 通过bucket_t - 16字节，读取上一个bucket

流程2：

◦ 如果sel存在，并且等于_cmd，进入CacheHit缓存命中流程
◦ CacheHit流程：使用imp = imp ^ cls解码
◦ 跳转到指定imp函数地址

流程3：

◦ 如果sel不存在，进入__objc_msgSend_uncached流程
◦ 上一个bucket地址和buckets首地址比较，如果>=，进入流程1
◦ 否则，<首地址，获取mask下标的bucket，进入流程4

流程4：

◦ 读取bucket_t中的imp和sel
◦ 通过bucket_t - 16字节，读取上一个bucket
◦ 如果sel等于_cmd，进入流程2
◦ 如果sel存在，并且上一个bucket地址>指定下标bucket地址，进入流程4
◦ 否则，进入__objc_msgSend_uncached流程

1. CacheLookup

2. LLookupStart

2.1 _bucketsAndMaybeMask的0号位

2.2 LLookupPreopt

2.3 对照源码中的cache_hash函数

3. 查找imp

3.1 获取当前下标的bucket