0%

iOS 底层探索 - 类的加载

应用的加载回顾

上一章我们对应用的加载有了初步的认识,我们知道了

  • 系统调用 exec() 会我们的应用映射到新的地址空间
  • 然后通过 dyld 进行加载、链接、初始化主程序和主程序所依赖的各种动态库
  • 最后在 initializeMainExecutable 方法中经过一系列初始化调用 notifySingle 函数,该函数会执行一个 load_images 的回调
  • 然后在 doModinitFuntions 函数内部会调用 __attribute__((constructor))c 函数
  • 然后 dyld 返回主程序的入口函数,开始进入主程序的 main 函数 在 main 函数执行执行,其实 dyld 还会在流程中初始化 libSystem,而 libSystem 又会去初始化 libDispatch,在 libDispatch 初始化方法里面又会有一步 _os_object_init,在 _os_object_init 内部就会调起 _objc_init。而对于 _objc_init 我们还需要继续探索,因为这里面会进行类的加载等一系列重要的工作。

探索_objc_init

首先来到 libObjc 源码的 _objc_init 方法处,你可以直接添加一个符号断点 _objc_init 或者全局搜索关键字来到这里:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/***********************************************************************
* _objc_init
* Bootstrap initialization. Registers our image notifier with dyld.
* Called by libSystem BEFORE library initialization time
**********************************************************************/
void _objc_init(void)
{
static bool initialized = false;
//如果已经初始化直接返回
if (initialized) return;
initialized = true;
// fixme defer initialization until an objc-using image is found?
environ_init();
tls_init();
static_init();
runtime_init();
exception_init();
#if __OBJC2__
cache_t::init();
#endif
_imp_implementationWithBlock_init();
_dyld_objc_notify_register(&map_images, load_images, unmap_image);
#if __OBJC2__
didCallDyldNotifyRegister = true;
#endif
}

environ_init

我们直接看调用的第一个方法environ_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/***********************************************************************
* environ_init
* Read environment variables that affect the runtime.
* Also print environment variable help, if requested.
**********************************************************************/
void environ_init(void)
{
if (issetugid()) {
// All environment variables are silently ignored when setuid or setgid
// This includes OBJC_HELP and OBJC_PRINT_OPTIONS themselves.
return;
}
// Turn off autorelease LRU coalescing by default for apps linked against
// older SDKs. LRU coalescing can reorder releases and certain older apps
// are accidentally relying on the ordering.
// rdar://problem/63886091
// if (!dyld_program_sdk_at_least(dyld_fall_2020_os_versions))
// DisableAutoreleaseCoalescingLRU = true;
bool PrintHelp = false;
bool PrintOptions = false;
bool maybeMallocDebugging = false;
// Scan environ[] directly instead of calling getenv() a lot.
// This optimizes the case where none are set.
for (char **p = *_NSGetEnviron(); *p != nil; p++) {
if (0 == strncmp(*p, "Malloc", 6) || 0 == strncmp(*p, "DYLD", 4) ||
0 == strncmp(*p, "NSZombiesEnabled", 16))
{
maybeMallocDebugging = true;
}
if (0 != strncmp(*p, "OBJC_", 5)) continue;

if (0 == strncmp(*p, "OBJC_HELP=", 10)) {
PrintHelp = true;
continue;
}
if (0 == strncmp(*p, "OBJC_PRINT_OPTIONS=", 19)) {
PrintOptions = true;
continue;
}
if (0 == strncmp(*p, "OBJC_DEBUG_POOL_DEPTH=", 22)) {
SetPageCountWarning(*p + 22);
continue;
}
const char *value = strchr(*p, '=');
if (!*value) continue;
value++;
for (size_t i = 0; i < sizeof(Settings)/sizeof(Settings[0]); i++) {
const option_t *opt = &Settings[i];
if ((size_t)(value - *p) == 1+opt->envlen &&
0 == strncmp(*p, opt->env, opt->envlen))
{
*opt->var = (0 == strcmp(value, "YES"));
break;
}
}
}

// Special case: enable some autorelease pool debugging
// when some malloc debugging is enabled
// and OBJC_DEBUG_POOL_ALLOCATION is not set to something other than NO.
if (maybeMallocDebugging) {
const char *insert = getenv("DYLD_INSERT_LIBRARIES");
const char *zombie = getenv("NSZombiesEnabled");
const char *pooldebug = getenv("OBJC_DEBUG_POOL_ALLOCATION");
if ((getenv("MallocStackLogging")
|| getenv("MallocStackLoggingNoCompact")
|| (zombie && (*zombie == 'Y' || *zombie == 'y'))
|| (insert && strstr(insert, "libgmalloc")))
&&
(!pooldebug || 0 == strcmp(pooldebug, "YES")))
{
DebugPoolAllocation = true;
}
}
// if (!os_feature_enabled_simple(objc4, preoptimizedCaches, true)) {
// DisablePreoptCaches = true;
// }
// Print OBJC_HELP and OBJC_PRINT_OPTIONS output.
if (PrintHelp || PrintOptions) {
if (PrintHelp) {
_objc_inform("Objective-C runtime debugging. Set variable=YES to enable.");
_objc_inform("OBJC_HELP: describe available environment variables");
if (PrintOptions) {
_objc_inform("OBJC_HELP is set");
}
_objc_inform("OBJC_PRINT_OPTIONS: list which options are set");
}
if (PrintOptions) {
_objc_inform("OBJC_PRINT_OPTIONS is set");
}
for (size_t i = 0; i < sizeof(Settings)/sizeof(Settings[0]); i++) {
const option_t *opt = &Settings[i];
if (PrintHelp) _objc_inform("%s: %s", opt->env, opt->help);
if (PrintOptions && *opt->var) _objc_inform("%s is set", opt->env);
}
}
}

我们可以看到,这里主要是读取影响 Runtime 的一些环境变量,如果需要,还可以打印环境变量帮助提示。

我们可以在终端测试一下,直接输入export OBJC-HELP=1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
jason@192 ~ % export OBJC_HELP=1
jason@192 ~ % ls
objc[6097]: Objective-C runtime debugging. Set variable=YES to enable.
objc[6097]: OBJC_HELP: describe available environment variables
objc[6097]: OBJC_PRINT_OPTIONS: list which options are set
objc[6097]: OBJC_PRINT_IMAGES: log image and library names as they are loaded
objc[6097]: OBJC_PRINT_IMAGE_TIMES: measure duration of image loading steps
objc[6097]: OBJC_PRINT_LOAD_METHODS: log calls to class and category +load methods
objc[6097]: OBJC_PRINT_INITIALIZE_METHODS: log calls to class +initialize methods
objc[6097]: OBJC_PRINT_RESOLVED_METHODS: log methods created by +resolveClassMethod: and +resolveInstanceMethod:
objc[6097]: OBJC_PRINT_CLASS_SETUP: log progress of class and category setup
objc[6097]: OBJC_PRINT_PROTOCOL_SETUP: log progress of protocol setup
objc[6097]: OBJC_PRINT_IVAR_SETUP: log processing of non-fragile ivars
objc[6097]: OBJC_PRINT_VTABLE_SETUP: log processing of class vtables
objc[6097]: OBJC_PRINT_VTABLE_IMAGES: print vtable images showing overridden methods
objc[6097]: OBJC_PRINT_CACHE_SETUP: log processing of method caches
objc[6097]: OBJC_PRINT_FUTURE_CLASSES: log use of future classes for toll-free bridging
objc[6097]: OBJC_PRINT_PREOPTIMIZATION: log preoptimization courtesy of dyld shared cache
objc[6097]: OBJC_PRINT_CXX_CTORS: log calls to C++ ctors and dtors for instance variables
objc[6097]: OBJC_PRINT_EXCEPTIONS: log exception handling
objc[6097]: OBJC_PRINT_EXCEPTION_THROW: log backtrace of every objc_exception_throw()
objc[6097]: OBJC_PRINT_ALT_HANDLERS: log processing of exception alt handlers
objc[6097]: OBJC_PRINT_REPLACED_METHODS: log methods replaced by category implementations
objc[6097]: OBJC_PRINT_DEPRECATION_WARNINGS: warn about calls to deprecated runtime functions
objc[6097]: OBJC_PRINT_POOL_HIGHWATER: log high-water marks for autorelease pools
objc[6097]: OBJC_PRINT_CUSTOM_CORE: log classes with custom core methods
objc[6097]: OBJC_PRINT_CUSTOM_RR: log classes with custom retain/release methods
objc[6097]: OBJC_PRINT_CUSTOM_AWZ: log classes with custom allocWithZone methods
objc[6097]: OBJC_PRINT_RAW_ISA: log classes that require raw pointer isa fields
objc[6097]: OBJC_DEBUG_UNLOAD: warn about poorly-behaving bundles when unloaded
objc[6097]: OBJC_DEBUG_FRAGILE_SUPERCLASSES: warn about subclasses that may have been broken by subsequent changes to superclasses
objc[6097]: OBJC_DEBUG_NIL_SYNC: warn about @synchronized(nil), which does no synchronization
objc[6097]: OBJC_DEBUG_NONFRAGILE_IVARS: capriciously rearrange non-fragile ivars
objc[6097]: OBJC_DEBUG_ALT_HANDLERS: record more info about bad alt handler use
objc[6097]: OBJC_DEBUG_MISSING_POOLS: warn about autorelease with no pool in place, which may be a leak
objc[6097]: OBJC_DEBUG_POOL_ALLOCATION: halt when autorelease pools are popped out of order, and allow heap debuggers to track autorelease pools
objc[6097]: OBJC_DEBUG_DUPLICATE_CLASSES: halt when multiple classes with the same name are present
objc[6097]: OBJC_DEBUG_DONT_CRASH: halt the process by exiting instead of crashing
objc[6097]: OBJC_DISABLE_VTABLES: disable vtable dispatch
objc[6097]: OBJC_DISABLE_PREOPTIMIZATION: disable preoptimization courtesy of dyld shared cache
objc[6097]: OBJC_DISABLE_TAGGED_POINTERS: disable tagged pointer optimization of NSNumber et al.
objc[6097]: OBJC_DISABLE_TAG_OBFUSCATION: disable obfuscation of tagged pointers
objc[6097]: OBJC_DISABLE_NONPOINTER_ISA: disable non-pointer isa fields
objc[6097]: OBJC_DISABLE_INITIALIZE_FORK_SAFETY: disable safety checks for +initialize after fork

可以看到不同的环境变量对应的内容都被打印出来了。

tls_init

接着看tls_init方法内部:

1
2
3
4
5
6
7
8
void tls_init(void)
{
#if SUPPORT_DIRECT_THREAD_KEYS
pthread_key_init_np(TLS_DIRECT_KEY, &_objc_pthread_destroyspecific);
#else
_objc_pthread_key = tls_create(&_objc_pthread_destroyspecific);
#endif
}

这里执行的是关于线程 key 的绑定,比如每个线程数据的析构函数。

static_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/***********************************************************************
* static_init
* Run C++ static constructor functions.
* libc calls _objc_init() before dyld would call our static constructors,
* so we have to do it ourselves.
**********************************************************************/
static void static_init()
{
size_t count;
auto inits = getLibobjcInitializers(&_mh_dylib_header, &count);
for (size_t i = 0; i < count; i++) {
inits[i]();
}
auto offsets = getLibobjcInitializerOffsets(&_mh_dylib_header, &count);
for (size_t i = 0; i < count; i++) {
UnsignedInitializer init(offsets[i]);
init();
}
}

这里会运行 C++ 的静态构造函数,在 dyld 调用我们的静态构造函数之前,libc 会调用 _objc_init,所以这里我们必须自己来做,并且这里只会初始化系统内置的 C++ 静态构造函数,我们自己代码里面写的并不会在这里初始化。

runtime_init

1
2
3
4
5
void runtime_init(void)
{
objc::unattachedCategories.init(32);
objc::allocatedClasses.init();
}

Runtime环境初始化,unattachedCategories和allocatedClasses两张表的初始化工作

exception_init

1
2
3
4
5
6
7
8
9
/***********************************************************************
* exception_init
* Initialize libobjc's exception handling system.
* Called by map_images().
**********************************************************************/
void exception_init(void)
{
old_terminate = std::set_terminate(&_objc_terminate);
}

这里是初始化 libobjc 的异常处理系统,我们程序触发的异常都会来到:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/***********************************************************************
* _objc_terminate
* Custom std::terminate handler.
*
* The uncaught exception callback is implemented as a std::terminate handler.
* 1. Check if there's an active exception
* 2. If so, check if it's an Objective-C exception
* 3. If so, call our registered callback with the object.
* 4. Finally, call the previous terminate handler.
**********************************************************************/
static void (*old_terminate)(void) = nil;
static void _objc_terminate(void)
{
if (PrintExceptions) {
_objc_inform("EXCEPTIONS: terminating");
}

if (! __cxa_current_exception_type()) {
// No current exception.
(*old_terminate)();
}
else {
// There is a current exception. Check if it's an objc exception.
@try {
__cxa_rethrow();
} @catch (id e) {
// It's an objc object. Call Foundation's handler, if any.
(*uncaught_handler)((id)e);
(*old_terminate)();
} @catch (...) {
// It's not an objc object. Continue to C++ terminate.
(*old_terminate)();
}
}
}

我们可以看到 _objc_terminate 是未处理异常的回调函数,其内部逻辑如下:

  • 检查是否是一个活跃的异常
  • 如果是活跃的异常,检查是否是 OC 抛出的异常
  • 如果是 OC 抛出的异常,调用 uncaught_handeler 回调函数指针
  • 如果不是 OC 抛出的异常,则继续 C++ 终止操作

cache_t::init()

看字面量很容易猜到,cache_t属性的初始化

_imp_implementationWithBlock_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/// Initialize the trampoline machinery. Normally this does nothing, as
/// everything is initialized lazily, but for certain processes we eagerly load
/// the trampolines dylib.
void
_imp_implementationWithBlock_init(void)
{
#if TARGET_OS_OSX
// Eagerly load libobjc-trampolines.dylib in certain processes. Some
// programs (most notably QtWebEngineProcess used by older versions of
// embedded Chromium) enable a highly restrictive sandbox profile which
// blocks access to that dylib. If anything calls
// imp_implementationWithBlock (as AppKit has started doing) then we'll
// crash trying to load it. Loading it here sets it up before the sandbox
// profile is enabled and blocks it.
//
// This fixes EA Origin (rdar://problem/50813789)
// and Steam (rdar://problem/55286131)
if (__progname &&
(strcmp(__progname, "QtWebEngineProcess") == 0 ||
strcmp(__progname, "Steam Helper") == 0)) {
Trampolines.Initialize();
}
#endif
}

启动回调机制。通常不会做什么,因为所有的初始化都是惰性的,但是对于某些进程,我们会迫不及待地加载trampolines dylib。

_dyld_objc_notify_register

接下来是我们今天探索的重点了: _dyld_objc_notify_register ,我们先看下它的定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
//
// Note: only for use by objc runtime
// Register handlers to be called when objc images are mapped, unmapped, and initialized.
// Dyld will call back the "mapped" function with an array of images that contain an objc-image-info section.
// Those images that are dylibs will have the ref-counts automatically bumped, so objc will no longer need to
// call dlopen() on them to keep them from being unloaded. During the call to _dyld_objc_notify_register(),
// dyld will call the "mapped" function with already loaded objc images. During any later dlopen() call,
// dyld will also call the "mapped" function. Dyld will call the "init" function when dyld would be called
// initializers in that image. This is when objc calls any +load methods in that image.
//
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped);

注意:仅供 objc 运行时使用
objc 镜像被映射(mapped)卸载(unmapped)初始化(initialized)的时候,注册的回调函数就会被调用。
这个方法是 dlyd 中声明的,一旦调用该方法,调用结果会作为该函数的参数回传回来。比如,当所有的 images 以及 sectionobjc-image-info 被加载之后会回调 mapped 方法。
load 方法也将在这个方法中被调用。

_dyld_objc_notify_register 方法的三个参数 map_imagesload_imagesunmap_image 其实都是函数指针:

1
2
3
typedef void (*_dyld_objc_notify_mapped)(unsigned count, const char* const paths[], const struct mach_header* const mh[]);
typedef void (*_dyld_objc_notify_init)(const char* path, const struct mach_header* mh);
typedef void (*_dyld_objc_notify_unmapped)(const char* path, const struct mach_header* mh);

这三个函数指针是在 dyld 中回调的,我们打开 dyld 的源码即可一探究竟,我们直接搜索 _dyld_objc_notify_register :

1
2
3
4
5
6
void _dyld_objc_notify_register(_dyld_objc_notify_mapped    mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped)
{
dyld::registerObjCNotifiers(mapped, init, unmapped);
}

接着来到 dyldregisterObjCNotifiers 方法内部:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
void registerObjCNotifiers(_dyld_objc_notify_mapped mapped, _dyld_objc_notify_init init, _dyld_objc_notify_unmapped unmapped)
{
// record functions to call
sNotifyObjCMapped = mapped;//map_images
sNotifyObjCInit = init;//load_images
sNotifyObjCUnmapped = unmapped;//unmap_images

// call 'mapped' function with all images mapped so far
try {
notifyBatchPartial(dyld_image_state_bound, true, NULL, false, true);
}
catch (const char* msg) {
// ignore request to abort during registration
}

// <rdar://problem/32209809> call 'init' function on all images already init'ed (below libSystem)
for (std::vector<ImageLoader*>::iterator it=sAllImages.begin(); it != sAllImages.end(); it++) {
ImageLoader* image = *it;
if ( (image->getState() == dyld_image_state_initialized) && image->notifyObjC() ) {
dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_INIT, (uint64_t)image->machHeader(), 0, 0);
(*sNotifyObjCInit)(image->getRealPath(), image->machHeader());
}
}
}
1
2
3
static _dyld_objc_notify_mapped		sNotifyObjCMapped;
static _dyld_objc_notify_init sNotifyObjCInit;
static _dyld_objc_notify_unmapped sNotifyObjCUnmapped;

通过上面的代码的内容说明在registerObjCNotifiers 内部, libObjc 传过来的这三个函数指针被 dyld 保存在了本地静态变量中。换句话来说,最终函数指针是否能被调用,取决于这三个静态变量:

  • sNotifyObjCMapped
  • sNotifyObjCInit
  • sNotifyObjCUnmapped

我们注意到 registerObjCNotifierstry-catch 语句中的 try 分支注释如下:

call ‘mapped’ function with all images mapped so far
调用 mapped 函数来映射所有的镜像

那么也就是说 notifyBatchPartial 里面会进行真正的函数指针的调用,我们进入这个方法内部:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
static void notifyBatchPartial(dyld_image_states state, bool orLater, dyld_image_state_change_handler onlyHandler, bool preflightOnly, bool onlyObjCMappedNotification)
{
//省略代码
// tell objc about new images
if ( (onlyHandler == NULL) && ((state == dyld_image_state_bound) || (orLater && (dyld_image_state_bound > state))) && (sNotifyObjCMapped != NULL) ) {
const char* paths[imageCount];
const mach_header* mhs[imageCount];
unsigned objcImageCount = 0;
for (int i=0; i < imageCount; ++i) {
ImageLoader* image = findImageByMachHeader(infos[i].imageLoadAddress);
bool hasObjC = false;
if ( image != NULL ) {
if ( image->objCMappedNotified() )
continue;
hasObjC = image->notifyObjC();
}
#if SUPPORT_ACCELERATE_TABLES
else if ( sAllCacheImagesProxy != NULL ) {
const mach_header* mh;
const char* path;
unsigned index;
if ( sAllCacheImagesProxy->addressInCache(infos[i].imageLoadAddress, &mh, &path, &index) ) {
hasObjC = (mh->flags & MH_HAS_OBJC);
}
}
#endif
if ( hasObjC ) {
paths[objcImageCount] = infos[i].imageFilePath;
mhs[objcImageCount] = infos[i].imageLoadAddress;
++objcImageCount;
if ( image != NULL )
image->setObjCMappedNotified();
}
}
if ( objcImageCount != 0 ) {
dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_MAP, 0, 0, 0);
uint64_t t0 = mach_absolute_time();
(*sNotifyObjCMapped)(objcImageCount, paths, mhs);//函数指针真正调用的地方
uint64_t t1 = mach_absolute_time();
ImageLoader::fgTotalObjCSetupTime += (t1-t0);
}
}
}
//省略代码
}

我们可以看到,在 notifyBatchPartial 方法内部,这里的注释:

tell objc about new images 告诉 objc 镜像已经映射完成了

弄清楚了三个函数指针是怎么调用的还不够,接下来我们要深入各个函数的内部看里面究竟做了什么样的事情。

探索 map_images

首先是 map_images ,我们来到它的实现:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/***********************************************************************
* map_images
* Process the given images which are being mapped in by dyld.
* Calls ABI-agnostic code after taking ABI-specific locks.
*
* Locking: write-locks runtimeLock
**********************************************************************/
void
map_images(unsigned count, const char * const paths[],
const struct mach_header * const mhdrs[])
{
mutex_locker_t lock(runtimeLock);
return map_images_nolock(count, paths, mhdrs);
}

Process the given images which are being mapped in by dyld.
Calls ABI-agnostic code after taking ABI-specific locks.

处理由 dyld 映射的给定镜像
取得特定于 ABI 的锁后,调用与 ABI 无关的代码。

这里会继续往下走到 map_images_nolock

map_images_nolock 内部代码十分冗长,我们经过分析之后,前面的工作基本上都是进行镜像文件信息的提取与统计,所以可以定位到最后的 _read_images

1
2
3
if (hCount > 0) {
_read_images(hList, hCount, totalClasses, unoptimizedTotalClasses);
}

这里进入 _read_images 的条件是 hCount 大于 0, hCount 表示的是 Mach-Oheader 的数量

OK,我们的主角登场了, _read_imageslookupImpOrForward 可以说是我们学习 RuntimeiOS 底层里面非常重要的两个概念了, lookUpImpOrForward 已经探索过了,剩下的 _read_images 我们也不能落下。

_read_images定义

Perform initial processing of the headers in the linked list beginning with headerList.
headerList 开始,对已经链接了的 Mach-O 镜像表中的头部进行初始化处理

我们可以看到,整个 _read_images 有接近 400 行代码。

通过查看代码,以及日志打印提示信息,我们大致可以将 _read_images 分为下面几个流程:

_read_images具体流程

doneOnce 流程

我们从第一个分支 doneOnce 开始,这个名词顾名思义,只会执行一次:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#define EACH_HEADER \
hIndex = 0; \
hIndex < hCount && (hi = hList[hIndex]); \
hIndex++
if (!doneOnce) {
doneOnce = YES;
launchTime = YES;
#if SUPPORT_NONPOINTER_ISA
// Disable non-pointer isa under some conditions.

# if SUPPORT_INDEXED_ISA
// Disable nonpointer isa if any image contains old Swift code
for (EACH_HEADER) {
if (hi->info()->containsSwift() &&
hi->info()->swiftUnstableVersion() < objc_image_info::SwiftVersion3)
{
DisableNonpointerIsa = true;
if (PrintRawIsa) {
_objc_inform("RAW ISA: disabling non-pointer isa because "
"the app or a framework contains Swift code "
"older than Swift 3.0");
}
break;
}
}
# endif
  • 通过宏 SUPPORT_NONPOINTER_ISA 判断当前是否支持开启内存优化的 isa
    • 如果支持,则在某些条件下需要禁用这个优化
  • 通过宏 SUPPORT_INDEXED_ISA 判断当前是否是将类存储在 isa 作为类表索引
    • 如果是的话,再递归遍历所有的 Mach-O 的头部,并且判断如果是 Swift 3.0 之前的代码,就需要禁用对 isa 的内存优化
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# if TARGET_OS_OSX
// Disable non-pointer isa if the app is too old
// (linked before OS X 10.11)
// if (!dyld_program_sdk_at_least(dyld_platform_version_macOS_10_11)) {
// DisableNonpointerIsa = true;
// if (PrintRawIsa) {
// _objc_inform("RAW ISA: disabling non-pointer isa because "
// "the app is too old.");
// }
// }

// Disable non-pointer isa if the app has a __DATA,__objc_rawisa section
// New apps that load old extensions may need this.
for (EACH_HEADER) {
if (hi->mhdr()->filetype != MH_EXECUTE) continue;
unsigned long size;
if (getsectiondata(hi->mhdr(), "__DATA", "__objc_rawisa", &size)) {
DisableNonpointerIsa = true;
if (PrintRawIsa) {
_objc_inform("RAW ISA: disabling non-pointer isa because "
"the app has a __DATA,__objc_rawisa section");
}
}
break; // assume only one MH_EXECUTE image
}
# endif
  • 通过宏 TARGET_OS_OSX 判断是否是 macOS 执行环境
  • 判断 macOS 的系统版本,如果小于 10.11 则说明 app 太陈旧了,需要禁用掉 non-pointer isa
  • 然后再遍历所有的 Mach-O 的头部,判断如果有 __DATA__,__objc_rawisa 段的存在,则禁用掉 non-pointer isa ,因为很多新的 app 加载老的扩展的时候会需要这样的判断操作。
1
2
3
4
5
6
7
// namedClasses
// Preoptimized classes don't go in this table.
// 4/3 is NXMapTable's load factor
int namedClassesSize =
(isPreoptimized() ? unoptimizedTotalClasses : totalClasses) * 4 / 3;
gdb_objc_realized_classes =
NXCreateMapTable(NXStrValueMapPrototype, namedClassesSize);

预先优化过的类不会加入到 gdb_objc_realized_classes 这个哈希表中来, gdb_objc_realized_classes 哈希表的装载因子为 0.75,这是一个经过验证的效率很高的扩容临界值。

  • 加载所有类到类的 gdb_objc_realized_classes 表中来
1
2
3
4
5
// This is a misnomer: gdb_objc_realized_classes is actually a list of 
// named classes not in the dyld shared cache, whether realized or not.
// This list excludes lazily named classes, which have to be looked up
// using a getClass hook.
NXMapTable *gdb_objc_realized_classes; // exported for debuggers in objc-gdb.h

这是一个误称:gdb_objc_realized_classes 表实际上存储的是不在 dyld 共享缓存里面的命名类,无论这些类是否实现

除了 gdb_objc_realized_classes 表之外,还有一张表 allocatedClasses :

其实 gdb_objc_realized_classesallocatedClasses 是一种包含的关系,一张是类的总表,一张是已经开辟了内存的类表.

Discover classes 流程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// Discover classes. Fix up unresolved future classes. Mark bundle classes.
bool hasDyldRoots = dyld_shared_cache_some_image_overridden();
for (EACH_HEADER) {
if (! mustReadClasses(hi, hasDyldRoots)) {
// Image is sufficiently optimized that we need not call readClass()
continue;
}
classref_t const *classlist = _getObjc2ClassList(hi, &count);

bool headerIsBundle = hi->isBundle();
bool headerIsPreoptimized = hi->hasPreoptimizedClasses();

for (i = 0; i < count; i++) {
Class cls = (Class)classlist[i];
Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);

if (newCls != cls && newCls) {
// Class was moved but not deleted. Currently this occurs
// only when the new class resolved a future class.
// Non-lazily realize the class below.
resolvedFutureClasses = (Class *)
realloc(resolvedFutureClasses,
(resolvedFutureClassCount+1) * sizeof(Class));
resolvedFutureClasses[resolvedFutureClassCount++] = newCls;
}
}
}

Discover classes. Fix up unresolved future classes. Mark bundle classes.
发现类。修正未解析的 future 类,标记 bundle 类。

  • 先通过 _getObjc2ClassList 来获取到所有的类
  • 接着还是遍历所有的 Mach-Oheader 部分,然后通过 mustReadClasses 来判断哪些条件可以跳过读取类这一步骤
  • 读取 header 是否是 Bundle
  • 读取 header 是否开启了 预优化
  • 遍历 _getObjc2ClassList 取出的所有的类
    • 通过 readClass 来读取类信息
    • 判断如果不相等并且 readClass 结果不为空,则需要重新为类开辟内存

Fix up remapped classes 流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// Fix up remapped classes
// Class list and nonlazy class list remain unremapped.
// Class refs and super refs are remapped for message dispatching.

if (!noClassesRemapped()) {
for (EACH_HEADER) {
Class *classrefs = _getObjc2ClassRefs(hi, &count);
for (i = 0; i < count; i++) {
remapClassRef(&classrefs[i]);
}
// fixme why doesn't test future1 catch the absence of this?
classrefs = _getObjc2SuperRefs(hi, &count);
for (i = 0; i < count; i++) {
remapClassRef(&classrefs[i]);
}
}
}

修复 重映射类
类表和非懒加载类表没有被重映射 (也就是 _objc_classlist)
由于消息转发,类引用和父类引用会被重映射 (也就是 _objc_classrefs)

  • 通过 noClassesRemapped 方法判断是否有类引用(_objc_classrefs)需要进行重映射
    • 如果需要,则遍历 EACH_HEADER
    • 通过 _getObjc2ClassRefs_getObjc2SuperRefs 取出当前遍历到的 Mach-O 的类引用和父类引用,然后调用 remapClassRef 进行重映射

Fix up @selector references 流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
    // Fix up @selector references
static size_t UnfixedSelectors;
{
mutex_locker_t lock(selLock);
for (EACH_HEADER) {
if (hi->hasPreoptimizedSelectors()) continue;

bool isBundle = hi->isBundle();
SEL *sels = _getObjc2SelectorRefs(hi, &count);
UnfixedSelectors += count;
for (i = 0; i < count; i++) {
const char *name = sel_cname(sels[i]);
SEL sel = sel_registerNameNoLock(name, isBundle);
if (sels[i] != sel) {
sels[i] = sel;
}
}
}
}
SEL sel_registerNameNoLock(const char *name, bool copy) {
return __sel_registerName(name, 0, copy); // NO lock, maybe copy
}

修正 SEL 引用

  • 操作前先加一个 selLock
  • 然后遍历EACH_HEADER
    • 如果开启了预优化,contiue 到下一个 Mach-O
    • 通过 _getObjc2SelectorRefs 拿到所有的 SEL 引用
    • 然后对所有的 SEL 引用调用 sel_registerNameNoLock 进行注册

也就是说这一流程最主要的目的就是注册 SEL ,我们注册真正发生的地方: __sel_registerName ,这个函数如果大家经常玩 Runtime 肯定不会陌生:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static SEL __sel_registerName(const char *name, bool shouldLock, bool copy) 
{
SEL result = 0;

if (shouldLock) selLock.assertUnlocked();
else selLock.assertLocked();

if (!name) return (SEL)0;

result = search_builtins(name);
if (result) return result;

conditional_mutex_locker_t lock(selLock, shouldLock);
auto it = namedSelectors.get().insert(name);
if (it.second) {
// No match. Insert.
*it.first = (const char *)sel_alloc(name, copy);
}
return (SEL)*it.first;
}

我们简单分析一下 __sel_registerName 方法的流程:

  • 判断是否要加锁
  • 如果 sel 为空,则返回一个空的 SEL
  • builtins 中搜索,看是否已经注册过,如果找到,直接返回结果
  • namedSelectors 哈希表中查询,找到了就返回结果
  • 如果 namedSelectors 未初始化,则创建一下这个哈希表
  • 如果上面的流程都没有找到,则需要调用 sel_alloc 来创建一下 SEL ,然后把新创建的 SEL 插入哈希表中进行缓存的填充
Fix up old objc_msgSend_fixup call sites 流程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#if SUPPORT_FIXUP
// Fix up old objc_msgSend_fixup call sites
for (EACH_HEADER) {
message_ref_t *refs = _getObjc2MessageRefs(hi, &count);
if (count == 0) continue;
if (PrintVtables) {
_objc_inform("VTABLES: repairing %zu unsupported vtable dispatch "
"call sites in %s", count, hi->fname());
}
for (i = 0; i < count; i++) {
fixupMessageRef(refs+i);
}
}
ts.log("IMAGE TIMES: fix up objc_msgSend_fixup");
#endif

修正旧的 objc_msgSend_fixup 调用

这个流程的执行前提是 FIXUP 被开启。

  • 还是老套路,遍历EACH_HEADER
    • 通过 _getObjc2MessageRefs 方法来获取当前遍历到的 Mach-O 镜像的所有消息引用
    • 然后遍历这些消息引用,然后调用 fixupMessageRef 进行修正
Discover protocols 流程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
// Discover protocols. Fix up protocol refs.
for (EACH_HEADER) {
extern objc_class OBJC_CLASS_$_Protocol;
Class cls = (Class)&OBJC_CLASS_$_Protocol;
ASSERT(cls);
NXMapTable *protocol_map = protocols();
bool isPreoptimized = hi->hasPreoptimizedProtocols();

// Skip reading protocols if this is an image from the shared cache
// and we support roots
// Note, after launch we do need to walk the protocol as the protocol
// in the shared cache is marked with isCanonical() and that may not
// be true if some non-shared cache binary was chosen as the canonical
// definition
if (launchTime && isPreoptimized) {
if (PrintProtocols) {
_objc_inform("PROTOCOLS: Skipping reading protocols in image: %s",
hi->fname());
}
continue;
}
bool isBundle = hi->isBundle();
protocol_t * const *protolist = _getObjc2ProtocolList(hi, &count);
for (i = 0; i < count; i++) {
readProtocol(protolist[i], cls, protocol_map,
isPreoptimized, isBundle);
}
}

发现协议,并修正协议引用

Fix up @protocol references 流程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// Fix up @protocol references
// Preoptimized images may have the right
// answer already but we don't know for sure.
for (EACH_HEADER) {
// At launch time, we know preoptimized image refs are pointing at the
// shared cache definition of a protocol. We can skip the check on
// launch, but have to visit @protocol refs for shared cache images
// loaded later.
if (launchTime && hi->isPreoptimized())
continue;
protocol_t **protolist = _getObjc2ProtocolRefs(hi, &count);
for (i = 0; i < count; i++) {
remapProtocolRef(&protolist[i]);
}
}

对所有的协议做重映射

Realize non-lazy classes 流程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// Realize non-lazy classes (for +load methods and static instances)
for (EACH_HEADER) {
classref_t const *classlist = hi->nlclslist(&count);
for (i = 0; i < count; i++) {
Class cls = remapClass(classlist[i]);
if (!cls) continue;

addClassTableEntry(cls);

if (cls->isSwiftStable()) {
if (cls->swiftMetadataInitializer()) {
_objc_fatal("Swift class %s with a metadata initializer "
"is not allowed to be non-lazy",
cls->nameForLogging());
}
// fixme also disallow relocatable classes
// We can't disallow all Swift classes because of
// classes like Swift.__EmptyArrayStorage
}
realizeClassWithoutSwift(cls, nil);
}
}

初始化非懒加载类( +load 方法和静态实例)**

Realize newly-resolved future classes 流程
1
2
3
4
5
6
7
8
9
10
11
12
// Realize newly-resolved future classes, in case CF manipulates them
if (resolvedFutureClasses) {
for (i = 0; i < resolvedFutureClassCount; i++) {
Class cls = resolvedFutureClasses[i];
if (cls->isSwiftStable()) {
_objc_fatal("Swift class is not allowed to be future");
}
realizeClassWithoutSwift(cls, nil);
cls->setInstancesRequireRawIsaRecursively(false/*inherited*/);
}
free(resolvedFutureClasses);
}

初始化新解析出来的 future

Discover categories 流程
1
2
3
4
5
6
7
8
9
// Discover categories. Only do this after the initial category
// attachment has been done. For categories present at startup,
// discovery is deferred until the first load_images call after
// the call to _dyld_objc_notify_register completes. rdar://problem/53119145
if (didInitialAttachCategories) {
for (EACH_HEADER) {
load_categories_nolock(hi);
}
}

处理所有的分类,包括类和元类

到这里, _read_images 的流程就分析完毕,我们可以新建一个文件来去掉一些干扰的信息,只保留核心的逻辑,这样从宏观的角度来分析更直观:

_read_image_schedule

Q & A 环节
Q: dyld 主要逻辑是加载库,也就是镜像文件,但是加载完是怎么读取的呢?
A: _read_images 是真正读取的地方

Q: SEL 方法编号何时加载?
A: _read_images

read_class 分析

我们探索了 _read_images 方法的流程,接下来让我们把目光放到本文的主题 - 类的加载
既然是类的加载,那么我们在前面所探索的类的结构中出现的内容都会一一重现。
所以我们不妨直接进行断点调试,让我们略过其它干扰信息,聚焦于类的加载。

  • 根据上一小节我们探索的结果, doneOnce 流程中会创建两个哈希表,并没有涉及到类的加载,所以我们跳过
  • 我们来到第二个流程 - 类处理

我们在_read_images函数,定位到下面这行代码:

1
2
Class cls = (Class)classlist[i];//断点
Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);

可以看到 cls 的属性、方法、协议以及类名都为空,说明这里类并没有被真正加载完成,我们接着聚焦到 read_class 函数上面,我们进入其内部实现,我们大致浏览之后会定位到如下图所示的代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/***********************************************************************
* readClass
* Read a class and metaclass as written by a compiler.
* Returns the new class pointer. This could be:
* - cls
* - nil (cls has a missing weak-linked superclass)
* - something else (space for this class was reserved by a future class)
*
* Note that all work performed by this function is preflighted by
* mustReadClasses(). Do not change this function without updating that one.
*
* Locking: runtimeLock acquired by map_images or objc_readClassPair
**********************************************************************/
Class readClass(Class cls, bool headerIsBundle, bool headerIsPreoptimized)
{
const char *mangledName = cls->nonlazyMangledName();

if (missingWeakSuperclass(cls)) {
// No superclass (probably weak-linked).
// Disavow any knowledge of this subclass.
if (PrintConnecting) {
_objc_inform("CLASS: IGNORING class '%s' with "
"missing weak-linked superclass",
cls->nameForLogging());
}
addRemappedClass(cls, nil);
cls->setSuperclass(nil);
return nil;
}

cls->fixupBackwardDeployingStableSwift();

Class replacing = nil;
if (mangledName != nullptr) {
if (Class newCls = popFutureNamedClass(mangledName)) {
// This name was previously allocated as a future class.
// Copy objc_class to future class's struct.
// Preserve future's rw data block.

if (newCls->isAnySwift()) {
_objc_fatal("Can't complete future class request for '%s' "
"because the real class is too big.",
cls->nameForLogging());
}

class_rw_t *rw = newCls->data();
const class_ro_t *old_ro = rw->ro();
memcpy(newCls, cls, sizeof(objc_class));

// Manually set address-discriminated ptrauthed fields
// so that newCls gets the correct signatures.
newCls->setSuperclass(cls->getSuperclass());
newCls->initIsa(cls->getIsa());

rw->set_ro((class_ro_t *)newCls->data());
newCls->setData(rw);
freeIfMutable((char *)old_ro->getName());
free((void *)old_ro);

addRemappedClass(cls, newCls);

replacing = cls;
cls = newCls;
}
}

if (headerIsPreoptimized && !replacing) {
// class list built in shared cache
// fixme strict assert doesn't work because of duplicates
// ASSERT(cls == getClass(name));
ASSERT(mangledName == nullptr || getClassExceptSomeSwift(mangledName));
} else {
if (mangledName) { //some Swift generic classes can lazily generate their names
addNamedClass(cls, mangledName, replacing);
} else {
Class meta = cls->ISA();
const class_ro_t *metaRO = meta->bits.safe_ro();
ASSERT(metaRO->getNonMetaclass() && "Metaclass with lazy name must have a pointer to the corresponding nonmetaclass.");
ASSERT(metaRO->getNonMetaclass() == cls && "Metaclass nonmetaclass pointer must equal the original class.");
}
addClassTableEntry(cls);
}

// for future reference: shared cache never contains MH_BUNDLEs
if (headerIsBundle) {
cls->data()->flags |= RO_FROM_BUNDLE;
cls->ISA()->data()->flags |= RO_FROM_BUNDLE;
}

return cls;
}

看起来类的信息在这里完成了加载,那么为了验证我们的猜想,直接断点调试一下但发现断点根本走不进来,原因在于这里的判断语句

1
if (Class newCls = popFutureNamedClass(mangledName))

判断当前传入的类的类名是否有 future 类的实现,但是我们刚才已经打印了,类名是空的,所以肯定不会执行这里。我们接着往下走:

  • addNamedClass 内部其实是将 cls 插入到 gdb_objc_realized_classes
  • addclassTableEntry 内部是将 cls 插入到 allocatedClasses

分析完 read_class ,我们回到 _read_images 方法

我们可以看到 read_class 返回的 newCls 会进行一个判断,判断与传入 read_class 之前的 cls 是否相等,而在 read_class 内部只有一个地方对类的内容进行了改动,但是我们刚才测试了是进不去的,所以这个 if 里面的内容我们可以略过,也就是说 resolvedFutureClasses 的内容我们都可以暂时略过。

总结一下 readClass

  • 判断是不是要后期处理的类
    • 如果是的话,就取出后期处理的类,读取这个类的 data() 类设置 ro/rw
  • addNamedClass 插入总表
  • addClassTableEntry 插入已开辟内存的类的表

realizeClassWithoutSwift 分析

通过分析 read_class ,我们可以得知,类已经被注册到两个哈希表中去了,那么现在一切时机都已经成熟了。但是我们还是要略过像 Fix up remapped classesFix up @selector referencesfix up old objc_msgSend_fixup call sitesDiscover protocols. Fix up protocol refsFix up @protocol references ,因为我们的重点是类的加载,我们最终来到了 Realize non-lazy classes (for +load methods and static instances) ,略去无关信息之后,我们可以看到我们的
主角 realizeClassWithoutSwift 闪亮登场了:

1
2
3
4
5
6
7
8
9
/***********************************************************************
* realizeClassWithoutSwift
* Performs first-time initialization on class cls,
* including allocating its read-write data.
* Does not perform any Swift-side initialization.
* Returns the real class structure for the class.
* Locking: runtimeLock must be write-locked by the caller
**********************************************************************/
static Class realizeClassWithoutSwift(Class cls, Class previously)

从方法的名称以及方法注释我们可以知道, realizeClassWithoutSwift 是进行类的第一次初始化操作,包括分配读写数据也就是我们常说的 rw ,但是并不会进行任何的 Swift 端初始化。我们直接聚焦下面的代码:

1
2
3
4
5
// Normal class. Allocate writeable class data.
rw = objc::zalloc<class_rw_t>();
rw->set_ro(ro);
rw->flags = RW_REALIZED|RW_REALIZING|isMeta;
cls->setData(rw);
  • 通过 zalloc 开辟内存空间,返回一个新的 rw
  • cls 取出来的 ro 赋值给这个 rw
  • rw 设置到 cls 身上

可以清楚地看到,此时 rw 还是为空,说明这里只是对 rw 进行了初始化,但是方法、属性、协议这些都没有被添加上。

我们接着往下走:

1
2
3
4
5
6
7
8
9
// Realize superclass and metaclass, if they aren't already.
// This needs to be done after RW_REALIZED is set above, for root classes.
// This needs to be done after class index is chosen, for root metaclasses.
// This assumes that none of those classes have Swift contents,
// or that Swift's initializers have already been called.
// fixme that assumption will be wrong if we add support
// for ObjC subclasses of Swift classes.
supercls = realizeClassWithoutSwift(remapClass(cls->getSuperclass()), nil);
metacls = realizeClassWithoutSwift(remapClass(cls->ISA()), nil);

这里可以看到父类和元类都会递归调用 realizeClassWithoutSwift 来初始化各自的 rw 。为什么在类的加载操作里面要去加载类和元类呢?回忆一下类的结构,答案很简单,要保证 superclassisa 的完整性,也就是保证类的完整性,

1
2
3
// Update superclass and metaclass in case of remapping
cls->setSuperclass(supercls);
cls->initClassIsa(metacls);

上面的代码就是最好的证明,初始化完毕的父类和元类被赋值到了类的 superclassisa 上面。

接着往下走可以看到,不光要把父类关联到类上面,还要让父类知道子类的存在。

最后一行代码是 methodizeClass(cls) ,注释显示的是 attach categories ,附加分类到类?我们进入其内部实现一探究竟。

在探索 methodizeClass 前,我们先总结一下 realizeClassWithoutSwift :

  • 读取 classdata()
  • ro/rw 赋值
  • 父类和元类实现
    • supercls = realizeClassWithoutSwift(remapClass(cls->superclass))
    • metacls = realizeClassWithoutSwift(remapClass(cls->ISA()))
  • 父类和元类归属关系
    • cls->superclass = supercls
    • cls->initClassIsa(metacls)
  • 将当前类链接到其父类的子类列表 addSubclass(supercls, cls)

methodizeClass 分析

realizeClassWithoutSwift方法最后一行调用的是methodizeClass方法

1
2
3
4
5
6
7
/***********************************************************************
* methodizeClass
* Fixes up cls's method list, protocol list, and property list.
* Attaches any outstanding categories.
* Locking: runtimeLock must be held by the caller
**********************************************************************/
static void methodizeClass(Class cls, Class previously)

对类的方法列表、协议列表和属性列表进行修正
附加 category 到类上面来

我们直接往下面走:

1
2
3
4
5
6
// Install methods and properties that the class implements itself.
method_list_t *list = ro->baseMethods();
if (list) {
prepareMethodLists(cls, &list, 1, YES, isBundleClass(cls), nullptr);
if (rwe) rwe->methods.attachLists(&list, 1);
}
  • ro 中取出方法列表附加到 rw
1
2
3
4
property_list_t *proplist = ro->baseProperties;
if (rwe && proplist) {
rwe->properties.attachLists(&proplist, 1);
}
  • ro 中取出属性列表附加到 rw
1
2
3
4
protocol_list_t *protolist = ro->baseProtocols;
if (rwe && protolist) {
rwe->protocols.attachLists(&protolist, 1);
}
  • ro 中取出协议列表附加到 rw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// Attach categories.
if (previously) {
if (isMeta) {
objc::unattachedCategories.attachToClass(cls, previously,
ATTACH_METACLASS);
} else {
// When a class relocates, categories with class methods
// may be registered on the class itself rather than on
// the metaclass. Tell attachToClass to look for those.
objc::unattachedCategories.attachToClass(cls, previously,
ATTACH_CLASS_AND_METACLASS);
}
}
objc::unattachedCategories.attachToClass(cls, cls,
isMeta ? ATTACH_METACLASS : ATTACH_CLASS);
  • cls 中取出未附加的分类进行附加操作

探索 load_images

我们接着探索 _dyld_objc_notify_register 的第二个参数 load_images ,这个函数指针是在什么时候调用的呢,同样的,我们接着在 dyld 源码中搜索对应的函数指针 sNotifyObjCInit :

1
2
3
4
5
6
7
for (std::vector<ImageLoader*>::iterator it=sAllImages.begin(); it != sAllImages.end(); it++) {
ImageLoader* image = *it;
if ( (image->getState() == dyld_image_state_initialized) && image->notifyObjC() ) {
dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_INIT, (uint64_t)image->machHeader(), 0, 0);
(*sNotifyObjCInit)(image->getRealPath(), image->machHeader());
}
}

可以看到,在 notifySingle 方法内部, sNotifyObjCInit 函数指针被调用了。根据我们上一篇文章探索 dyld 底层可以知道, _load_images 应该是对于每一个加载进来的 Mach-O 镜像都会递归调用一次。

我们来到 libObjc 源码中 load_images 的定义处:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/***********************************************************************
* load_images
* Process +load in the given images which are being mapped in by dyld.
*
* Locking: write-locks runtimeLock and loadMethodLock
**********************************************************************/
extern bool hasLoadMethods(const headerType *mhdr);
extern void prepare_load_methods(const headerType *mhdr);
void
load_images(const char *path __unused, const struct mach_header *mh)
{
if (!didInitialAttachCategories && didCallDyldNotifyRegister) {
didInitialAttachCategories = true;
loadAllCategories();
}
// Return without taking locks if there are no +load methods here.
if (!hasLoadMethods((const headerType *)mh)) return;
recursive_mutex_locker_t lock(loadMethodLock)
// Discover load methods
{
mutex_locker_t lock2(runtimeLock);
prepare_load_methods((const headerType *)mh);
}
// Call +load methods (without runtimeLock - re-entrant)
call_load_methods();
}

处理由 dyld 映射的给定镜像中的 +load 方法

  • 判断是否有 load 方法,如果没有,直接返回
  • 搜索 load 方法,具体实现通过 prepare_load_methods
  • 调用 load 方法,具体实现通过 call_load_methods

prepare_load_methods 分析

从这个方法名称,我们猜测这里应该做的是 load 方法的一些预处理工作,让我们来到源码进行分析:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
void prepare_load_methods(const headerType *mhdr)
{
size_t count, i;
runtimeLock.assertLocked();
classref_t const *classlist =
_getObjc2NonlazyClassList(mhdr, &count);
for (i = 0; i < count; i++) {
schedule_class_load(remapClass(classlist[i]));
}
category_t * const *categorylist = _getObjc2NonlazyCategoryList(mhdr, &count);
for (i = 0; i < count; i++) {
category_t *cat = categorylist[i];
Class cls = remapClass(cat->cls);
if (!cls) continue; // category for ignored weak-linked class
if (cls->isSwiftStable()) {
_objc_fatal("Swift class extensions and categories on Swift "
"classes are not allowed to have +load methods");
}
realizeClassWithoutSwift(cls, nil);
ASSERT(cls->ISA()->isRealized());
add_category_to_loadable_list(cat);
}
}
/***********************************************************************
* prepare_load_methods
* Schedule +load for classes in this image, any un-+load-ed
* superclasses in other images, and any categories in this image.
**********************************************************************/
// Recursively schedule +load for cls and any un-+load-ed superclasses.
// cls must already be connected.
static void schedule_class_load(Class cls)
{
if (!cls) return;
ASSERT(cls->isRealized()); // _read_images should realize
if (cls->data()->flags & RW_LOADED) return;
// Ensure superclass-first ordering
schedule_class_load(cls->getSuperclass());
add_class_to_loadable_list(cls);
cls->setInfo(RW_LOADED);
}
/***********************************************************************
* add_class_to_loadable_list
* Class cls has just become connected. Schedule it for +load if
* it implements a +load method.
**********************************************************************/
void add_class_to_loadable_list(Class cls)
{
IMP method;
loadMethodLock.assertLocked();
method = cls->getLoadMethod();
if (!method) return; // Don't bother if cls has no +load method

if (PrintLoading) {
_objc_inform("LOAD: class '%s' scheduled for +load",
cls->nameForLogging());
}

if (loadable_classes_used == loadable_classes_allocated) {
loadable_classes_allocated = loadable_classes_allocated*2 + 16;
loadable_classes = (struct loadable_class *)
realloc(loadable_classes,
loadable_classes_allocated *
sizeof(struct loadable_class));
}
loadable_classes[loadable_classes_used].cls = cls;
loadable_classes[loadable_classes_used].method = method;
loadable_classes_used++;
}
  • 首先通过 _getObjc2NonlazyClassList 获取所有已经加载进去的类列表

  • 然后通过schedule_class_load 遍历这些类

    • 递归调用遍历父类的 load 方法,确保父类的 load 方法顺序排在子类的前面
    • 通过 add_class_to_loadable_list , 把类的 load 方法存在 loadable_classes 里面
  • 完成 schedule_class_load 之后,通过 _getObjc2NonlazyCategoryList 取出所有分类数据

  • 然后遍历这些分类

    • 通过 realizeClassWithoutSwift 来防止类没有初始化,如果已经初始化了则不影响
    • 通过 add_category_to_loadable_list ,加载分类中的 load 方法到 loadable_categories 里面

call_load_methods 分析

通过名称我们可以知道 call_load_methods 应该就是 load 方法被调用的地方了。我们直接看源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/***********************************************************************
* call_load_methods
* Call all pending class and category +load methods.
* Class +load methods are called superclass-first.
* Category +load methods are not called until after the parent class's +load.
*
* This method must be RE-ENTRANT, because a +load could trigger
* more image mapping. In addition, the superclass-first ordering
* must be preserved in the face of re-entrant calls. Therefore,
* only the OUTERMOST call of this function will do anything, and
* that call will handle all loadable classes, even those generated
* while it was running.
*
* The sequence below preserves +load ordering in the face of
* image loading during a +load, and make sure that no
* +load method is forgotten because it was added during
* a +load call.
* Sequence:
* 1. Repeatedly call class +loads until there aren't any more
* 2. Call category +loads ONCE.
* 3. Run more +loads if:
* (a) there are more classes to load, OR
* (b) there are some potential category +loads that have
* still never been attempted.
* Category +loads are only run once to ensure "parent class first"
* ordering, even if a category +load triggers a new loadable class
* and a new loadable category attached to that class.
*
* Locking: loadMethodLock must be held by the caller
* All other locks must not be held.
**********************************************************************/
void call_load_methods(void)
{
static bool loading = NO;
bool more_categories;
loadMethodLock.assertLocked();
// Re-entrant calls do nothing; the outermost call will finish the job.
if (loading) return;
loading = YES;
void *pool = objc_autoreleasePoolPush();
do {
// 1. Repeatedly call class +loads until there aren't any more
while (loadable_classes_used > 0) {
call_class_loads();
}
// 2. Call category +loads ONCE
more_categories = call_category_loads();
// 3. Run more +loads if there are classes OR more untried categories
} while (loadable_classes_used > 0 || more_categories);
objc_autoreleasePoolPop(pool);
loading = NO;
}

call_load_methods
调用类和类别中所有未决的 +load 方法
类里面 +load 方法是父类优先调用的
而在父类的 +load 之后才会调用分类的 +load 方法

  • 通过 objc_autoreleasePoolPush 压栈一个自动释放池
  • do-while循环开始
    • 循环调用类的 +load 方法直到找不到为止
    • 调用一次分类中的 +load 方法
  • 通过 objc_autoreleasePoolPop 出栈一个自动释放池

总结

至此, _objc_init_dyld_objc_notify_register 我们就分析完了,我们对类的加载有了更细致的认知。 iOS 底层有时候探索起来确实很枯燥,但是如果能找到高效的方法以及明确自己的所探索的方向,会让自己从宏观上重新审视这门技术。是的,技术只是工具,我们不能被技术所绑架,我们要做到有的放矢的去探索,这样才能事半功倍。