0%

iOS底层探索 - objc_msgSend(上)

上一篇探究了catch_t的结构和insert流程,insert的调用方法还不清楚,我们本篇就从insert的调用开始探索。 在探索之前我们首先看一下runtime的概念。

runtime

runtime翻译过来就是运行时,与其对应的一个词是编译时,它们的区别是:

  • 编译时 顾名思义就是正在编译的时候 . 那啥叫编译呢?就是编译器帮你把源代码翻译成机器能识别的代码,(当然只是一般意义上这么说,实际上可能只是翻译成某个中间状态的语言)

    编译时就是简单的作一些翻译工作,词法分析,语法分析之类的过程。如果发现错误编译器就告诉你,这时的错误就叫编译时错误。这个过程中做的类型检查也就叫编译时类型检查或静态类型检查。(所谓静态就是没有真把代码放内存中运行起来,而只是把代码当作文本来扫描)。

  • 运行时:代码跑起来了被装载到内存中了(代码保存在磁盘上没装入内存之前是个死代码,只有跑到内 存中才变成活的)。运行时类型检查与编译时类型检查(静态类型检查)不一样,不是简单的扫描代码而是在内存中做些操作以及判断。比如:

    1
    2
    NSObjcet *obj = [[NSObjcet alloc] init];
    [obj performSelector:@selector(saySomething)];

insert的调用

前面文章我们知道在调用一个方法的时候会添加缓存,所以我们实例化一个JSPerson实例,调用saySomething方法,查看调用栈的信息:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//  JSPerson.h
@interface JSPerson : NSObject
- (void)saySomething;
@end
// JSPerson.m
#import "JSPerson.h"
@implementation JSPerson
- (void)saySomething{
NSLog(@"%s",__func__);
}
int main(int argc, const char * argv[]) {
@autoreleasepool {
JSPerson *person = [JSPerson alloc];
[person saySomething];
}
return 0;
}

查看编译器的调用栈:

WeChat9862144aabb8df7479fd49bc79b7f67f

可以清楚的看到,insert方法之前的调用栈从main执行到_objc_msgSend_uncached最后执行到insert。即前面没有找到缓存会执行到这里。查找缓存的流程是什么样的呢?我们其实都知道OC里调用方法的本质是消息发送,通过clang命令将main.m转换成main.cpp文件也能验证这一点:

clang -rewrite-objc main.m

1
2
3
4
5
6
7
8
9
int main(int argc, const char * argv[]) {
/* @autoreleasepool */ { __AtAutoreleasePool __autoreleasepool;

JSPerson *person = ((JSPerson *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("JSPerson"), sel_registerName("alloc"));
((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("saySomething"));

}
return 0;
}

可以看到我们的代码其实就是向person对象发送了一个sel_registerName("saySomething")的消息,在到达_objc_msgSend_uncached之前做了缓存的查找。所以我们从objc_msgSend开始探索缓存查找的过程。

快速查找方法(缓存)

objc源码中搜索objc_msgSend,在objc-msg-arm64.s文件中找到了实现END_ENTRY _objc_msgSend

ENTRY _objc_msgSend

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
	ENTRY _objc_msgSend
UNWIND _objc_msgSend, NoFrame
// 判断 p0是否为空,p0是第一个参数 - 消息接收者receiver
cmp p0, #0 // nil check and tagged pointer check
// 支持taggedpointer(小对象类型)的流程
#if SUPPORT_TAGGED_POINTERS
b.le LNilOrTagged // (MSB tagged pointer looks negative)
#else
//p0 等于 0 时,直接返回 空
b.eq LReturnZero
#endif
//根据对象拿出isa ,即从x0寄存器指向的地址 取出 isa,存入 p13寄存器
//取isa的原因是,不论是对象方法还是类方法,都需要在类或元类的缓存或者方法列表中查找。
ldr p13, [x0] // p13 = isa
//在64位架构下通过 p16 = isa(p13) & ISA_MASK,拿出shiftcls信息,得到class信息
GetClassFromIsa_p16 p13, 1, x0 // p16 = class
LGetIsaDone://找到isa之后执行
// calls imp or objc_msgSend_uncached
//如果有isa,走到CacheLookup 即查找方法缓存,也就是所谓的sel imp快速查找流程 否则走__objc_msgSend_uncached
CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached
#if SUPPORT_TAGGED_POINTERS //
LNilOrTagged:
b.eq LReturnZero // nil check
GetTaggedClass
b LGetIsaDone
// SUPPORT_TAGGED_POINTERS
#endif
LReturnZero:
// x0 is already zero
mov x1, #0
movi d0, #0
movi d1, #0
movi d2, #0
movi d3, #0
ret
END_ENTRY _objc_msgSend

GetClassFromIsa_p16获取isa

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
.macro GetClassFromIsa_p16 src, needs_auth, auth_address /* note: auth_address is not required if !needs_auth */
// __ARM_ARCH_7K__ >= 2 || (__arm64__ && !__LP64__) 优化的isa
#if SUPPORT_INDEXED_ISA
// Indexed isa
// 将 isa 存入p16寄存器
mov p16, \src // optimistically set dst = src
//判断是否是 not nonapointer
tbz p16, #ISA_INDEX_IS_NPI_BIT, 1f // done if not non-pointer isa
// 将_objc_indexed_classes所在的页的基址 读入x10寄存器 isa in p16 is indexed
adrp x10, _objc_indexed_classes@PAGE
//x10 = x10 + _objc_indexed_classes(page中的偏移量) --x10基址 根据 偏移量 进行 内存偏移
add x10, x10, _objc_indexed_classes@PAGEOFF
//从p16的第ISA_INDEX_SHIFT位开始,提取 ISA_INDEX_BITS 位 到 p16寄存器,剩余的高位用0补充
ubfx p16, p16, #ISA_INDEX_SHIFT, #ISA_INDEX_BITS // extract index
//
ldr p16, [x10, p16, UXTP #PTRSHIFT] // load class from array
1:
//64位
#elif __LP64__
.if \needs_auth == 0 // _cache_getImp takes an authed class already
mov p16, \src
.else
// 64-bit packed isa
//p16 = class = isa & ISA_MASK(位运算 & 即获取isa中的shiftcls信息)
ExtractISA p16, \src, \auth_address
.endif
#else
// 32-bit raw isa 32位系统
mov p16, \src
#endif
.endmacro
.macro ExtractISA
//p16 = class = isa & ISA_MASK(位运算 & 即获取isa中的shiftcls信息)
and $0, $1, #ISA_MASK
.endmacro

CacheLookup 缓存查找汇编源码

缓存查找执行的是CacheLookup:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
.macro CacheLookup Mode, Function, MissLabelDynamic, MissLabelConstant
//
// Restart protocol:
//
// As soon as we're past the LLookupStart\Function label we may have
// loaded an invalid cache pointer or mask.
//
// When task_restartable_ranges_synchronize() is called,
// (or when a signal hits us) before we're past LLookupEnd\Function,
// then our PC will be reset to LLookupRecover\Function which forcefully
// jumps to the cache-miss codepath which have the following
// requirements:
//
// GETIMP:
// The cache-miss is just returning NULL (setting x0 to 0)
//
// NORMAL and LOOKUP:
// - x0 contains the receiver
// - x1 contains the selector
// - x16 contains the isa
// - other registers are set as per calling conventions
//

mov x15, x16 // stash the original isa
LLookupStart\Function:
// p1 = SEL, p16 = isa
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
ldr p10, [x16, #CACHE] // p10 = mask|buckets
lsr p11, p10, #48 // p11 = mask
and p10, p10, #0xffffffffffff // p10 = buckets
and w12, w1, w11 // x12 = _cmd & mask
//64位真机
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
//将x16 平移 CACHE大小 16位 CACHE = (2 * __SIZEOF_POINTER__)2个指针大小
//也就是isa 平移 0x10 找到cache p11 = cache
ldr p11, [x16, #CACHE] // p11 = mask|buckets
//64位真机
#if CONFIG_USE_PREOPT_CACHES
//arm64e A12(iPhone X)及之后机型
#if __has_feature(ptrauth_calls)
//判断p11(cache)是否存在 不存在跳转LLookupPreopt 存在继续执行
tbnz p11, #0, LLookupPreopt\Function
//p10 = cahce&mask = buckets
and p10, p11, #0x0000ffffffffffff // p10 = buckets
#else
and p10, p11, #0x0000fffffffffffe // p10 = buckets
tbnz p11, #0, LLookupPreopt\Function
#endif
// p12 = p1^(p1>>7) p1 = _cmd
eor p12, p1, p1, LSR #7
// p12 = p12&(p11>>48) 哈希编码index
and p12, p12, p11, LSR #48 // x12 = (_cmd ^ (_cmd >> 7)) & mask
#else
and p10, p11, #0x0000ffffffffffff // p10 = buckets
and p12, p1, p11, LSR #48 // x12 = _cmd & mask
#endif // CONFIG_USE_PREOPT_CACHES
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
ldr p11, [x16, #CACHE] // p11 = mask|buckets
and p10, p11, #~0xf // p10 = buckets
and p11, p11, #0xf // p11 = maskShift
mov p12, #0xffff
lsr p11, p12, p11 // p11 = mask = 0xffff >> p11
and p12, p1, p11 // x12 = _cmd & mask
#else
#error Unsupported cache mask storage for ARM64.
#endif
// 根据index找到对应的地址 p13 = bukects[index] PTRSHIFT=3
add p13, p10, p12, LSL #(1+PTRSHIFT)
// p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

// do {
// 从p13取出imp和sel p17=imp p9=sel bucket--
1: ldp p17, p9, [x13], #-BUCKET_SIZE // {imp, sel} = *bucket--
//判断p9是否等于传入的cmd
cmp p9, p1 // if (sel != _cmd) {
//不相等,跳转到 3f
b.ne 3f // scan more
// } else {
//缓存命中
2: CacheHit \Mode // hit: call or return imp
// }
//p9为空
3: cbz p9, \MissLabelDynamic // if (sel == 0) goto Miss;
//p13>p10(首地址) 循环查找buckets
cmp p13, p10 // } while (bucket >= buckets)
b.hs 1b

// wrap-around:
// p10 = first bucket
// p11 = mask (and maybe other bits on LP64)
// p12 = _cmd & mask
//
// A full cache can happen with CACHE_ALLOW_FULL_UTILIZATION.
// So stop when we circle back to the first probed bucket
// rather than when hitting the first bucket again.
//
// Note that we might probe the initial bucket twice
// when the first probed slot is the last entry.


#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
add p13, p10, w11, UXTW #(1+PTRSHIFT)
// p13 = buckets + (mask << 1+PTRSHIFT)
//64位真机
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
//
add p13, p10, p11, LSR #(48 - (1+PTRSHIFT))
// p13 = buckets + (mask << 1+PTRSHIFT)
// see comment about maskZeroBits
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
add p13, p10, p11, LSL #(1+PTRSHIFT)
// p13 = buckets + (mask << 1+PTRSHIFT)
#else
#error Unsupported cache mask storage for ARM64.
#endif
add p12, p10, p12, LSL #(1+PTRSHIFT)
// p12 = first probed bucket

// do {
4: ldp p17, p9, [x13], #-BUCKET_SIZE // {imp, sel} = *bucket--
cmp p9, p1 // if (sel == _cmd)
b.eq 2b // goto hit
cmp p9, #0 // } while (sel != 0 &&
ccmp p13, p12, #0, ne // bucket > first_probed)
b.hi 4b

LLookupEnd\Function:
LLookupRecover\Function:
b \MissLabelDynamic

宏的定义参考定义:

1
2
3
4
5
6
7
8
9
10
11
#if defined(__arm64__) && __LP64__
#if TARGET_OS_OSX || TARGET_OS_SIMULATOR
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
#endif
#elif defined(__arm64__) && !__LP64__
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED
#endif

CashHit

找到缓存就是执行Cashit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
// CacheHit: x17 = cached IMP, x10 = address of buckets, x1 = SEL, x16 = isa
.macro CacheHit
.if $0 == NORMAL
TailCallCachedImp x17, x10, x1, x16 // authenticate and call imp
.elseif $0 == GETIMP
mov p0, p17
cbz p0, 9f // don't ptrauth a nil imp
AuthAndResignAsIMP x0, x10, x1, x16 // authenticate imp and re-sign as IMP
9: ret // return IMP
.elseif $0 == LOOKUP
// No nil check for ptrauth: the caller would crash anyway when they
// jump to a nil IMP. We don't care if that jump also fails ptrauth.
AuthAndResignAsIMP x17, x10, x1, x16 // authenticate imp and re-sign as IMP
cmp x16, x15
cinc x16, x16, ne // x16 += 1 when x15 != x16 (for instrumentation ; fallback to the parent class)
ret // return imp via x17
.else
.abort oops
.endif
.endmacro

.macro TailCallCachedImp
// $0 = cached imp, $1 = address of cached imp, $2 = SEL, $3 = isa
// $0 = imp ^ isa = 哈希编码index
eor $0, $0, $3
// 执行imp
br $0
.endmacro

###总结

通过代码的注释可以看出_objc_msgSend查找缓存的步骤是

  • 判断消息接收者(receiver)是否为空
  • 根据对象首地址找到isa
  • 对象的isa指向找到类classp16
  • class内存平移16找到cache
  • cache&mask -> buckets
  • (_cmd ^ (_cmd >> 7)) & mask得到index
  • 通过buckets[index]找到indexbucket
  • 根据bucket取到impsel
  • 比较sel是否是传入的_cmd
  • 如果是 执行Cashit 执行imp
  • 如果不是 循环查找
  • 找不到的话执行_objc_msgSend_uncached流程

下一篇我们继续探索找不到的情况也就是_objc_msgSend_uncached的流程。