iOS底层：cache_t

前文我们探索了iOS类的底层原理，简单了解了四个重要的属性。这篇文章主要探索第三个属性cache_t，对于这个属性，我们可以学习到苹果对于缓存的设计和理解。

探索`cache_t`

从数据结构开始

`cache_t`的基本结构

我们看一下源码：

struct cache_t {
//省略代码
public:
    // The following four fields are public for objcdt's use only.
    // objcdt reaches into fields while the process is suspended
    // hence doesn't care for locks and pesky little details like this
    // and can safely use these.
    unsigned capacity() const;
    struct bucket_t *buckets() const;//重点
    Class cls() const;

#if CONFIG_USE_PREOPT_CACHES
    const preopt_cache_t *preopt_cache() const;
#endif

    mask_t occupied() const;
    void initializeToEmpty();
 //省略代码
}

发现有一个buckets()成员，它是一个结构体bucket_t

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif
    // Compute the ptrauth signing modifier from &_imp, newSel, and cls.
    uintptr_t modifierForSEL(bucket_t *base, SEL newSel, Class cls) const {
        return (uintptr_t)base ^ (uintptr_t)newSel ^ (uintptr_t)cls;
    }
    // Sign newImp, with &_imp, newSel, and cls as modifiers.
    uintptr_t encodeImp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, IMP newImp, UNUSED_WITHOUT_PTRAUTH SEL newSel, Class cls) const {
        if (!newImp) return 0;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        return (uintptr_t)
            ptrauth_auth_and_resign(newImp,
                                    ptrauth_key_function_pointer, 0,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(base, newSel, cls));
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (uintptr_t)newImp ^ (uintptr_t)cls;
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (uintptr_t)newImp;
#else
#error Unknown method cache IMP encoding.
#endif
    }

public:
    static inline size_t offsetOfSel() { return offsetof(bucket_t, _sel); }
    inline SEL sel() const { return _sel.load(memory_order_relaxed); }

#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
#define MAYBE_UNUSED_ISA
#else
#define MAYBE_UNUSED_ISA __attribute__((unused))
#endif
    inline IMP rawImp(MAYBE_UNUSED_ISA objc_class *cls) const {
        uintptr_t imp = _imp.load(memory_order_relaxed);
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        imp ^= (uintptr_t)cls;
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
#else
#error Unknown method cache IMP encoding.
#endif
        return (IMP)imp;
    }

    inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
        uintptr_t imp = _imp.load(memory_order_relaxed);
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(base, sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }

    template <Atomicity, IMPEncoding>
    void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);
};

从源码定义中不难看出，bucket_t 其实缓存的是方法实现 IMP。这里有一个注意点，就是 IMP-first 和 SEL-first。

IMP-first is better for arm64e ptrauth and no worse for arm64.

IMP-first 对 arm64e 的效果更好，对 arm64 不会有坏的影响。

SEL-first is better for armv7* and i386 and x86_64.

SEL-first 适用于 armv7 * 和 i386 和 x86_64。

通过上面的源码，我们大致了解了 bucket_t 类型的结构，那么现在问题来了，类中的 cache 是在什么时候以什么样的方式来进行缓存的呢？我们使用LLDB查看一下：

lldb) x pClass
0x100008220: f8 81 00 00 01 00 00 00 40 71 35 00 01 00 00 00  ........@q5.....
0x100008230: 10 d4 22 01 01 00 00 00 03 00 00 00 24 80 01 00  ..".........$...
(lldb) p (cache_t *)0x100008230 //0x100008220 + 16字节（isa 8字节 + superclass 8字节）
(cache_t *) $2 = 0x0000000100008230
(lldb) p *$2
(cache_t) $3 = {
  _bucketsAndMaybeMask = {
    std::__1::atomic<unsigned long> = {
      Value = 4314027024
    }
  }
   = {
     = {
      _maybeMask = {
        std::__1::atomic<unsigned int> = {
          Value = 3
        }
      }
      _flags = 32804
      _occupied = 1
    }
    _originalPreoptCache = {
      std::__1::atomic<preopt_cache_t *> = {
        Value = 0x0001802400000003
      }
    }
  }
}
(lldb) p $3.buckets()
(bucket_t *) $4 = 0x000000010122d410
(lldb) p *$4
(bucket_t) $5 = {
  _sel = {
    std::__1::atomic<objc_selector *> = "" {
      Value = ""
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 3365936
    }
  }
}

_occupied 应该是表示当前已经占用了多少缓存（每调用一个实例方法会+1）。下面验证一下

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        Person *p = [[Person alloc] init];
      	Class pClass = object_getClass(p);
        [p sayHello];
        [p sayOk];//断点打在这
    
    }
    return 0;
}

(lldb) x pClass
0x100008240: 18 82 00 00 01 00 00 00 40 71 35 00 01 00 00 00  ........@q5.....
0x100008250: 00 dc 74 00 01 00 00 00 03 00 00 00 24 80 02 00  ..t.........$...
(lldb) p (cache_t *)0x100008250
(cache_t *) $1 = 0x0000000100008250
(lldb) p *$1
(cache_t) $2 = {
  _bucketsAndMaybeMask = {
    std::__1::atomic<unsigned long> = {
      Value = 4302625792
    }
  }
   = {
     = {
      _maybeMask = {
        std::__1::atomic<unsigned int> = {
          Value = 3//注意
        }
      }
      _flags = 32804
      _occupied = 2
    }
    _originalPreoptCache = {
      std::__1::atomic<preopt_cache_t *> = {
        Value = 0x0002802400000003
      }
    }
  }
}

可以看到_occupied = 2验证了我们的猜想，我们继续走断点发现：

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        Person *p = [[Person alloc] init];
      	Class pClass = object_getClass(p);
        [p sayHello];
        [p sayOk];
        //断点在最后一步
    }
    return 0;
}

(lldb) p *$1
(cache_t) $3 = {
  _bucketsAndMaybeMask = {
    std::__1::atomic<unsigned long> = {
      Value = 4305857472
    }
  }
   = {
     = {
      _maybeMask = {
        std::__1::atomic<unsigned int> = {
          Value = 7//注意
        }
      }
      _flags = 32804
      _occupied = 1
    }
    _originalPreoptCache = {
      std::__1::atomic<preopt_cache_t *> = {
        Value = 0x0001802400000007
      }
    }
  }
}

我们发现_occupied=1,_maybeMask的Value=7(上一步=3)，这是为什么

如果读者了解并掌握散列表这种数据结构的话，相信已经看出端倪了。是的，这里其实就是用到了 开放寻址法 来解决散列冲突（哈希冲突）。

关于哈希冲突，可以借助鸽笼理论，即把 11 只鸽子放进 10 个抽屉里面，肯定会有一个抽屉里面有 2 只鸽子。是不是理解起来很简单?

通过上面的测试，我们明确了方法缓存使用的是哈希表存储，并且为了解决无法避免的哈希冲突使用的是开放寻址法，而开放寻址法必然要在合适的时机进行扩容，这个时机肯定不是会在数据已经装满的时候，我们可以进源码探索一下，我们快速定位到 cache_t 的源码处:

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    runtimeLock.assertLocked();
    ///省略代码
    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
  	///取当前占用的空间大小
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it. 
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
      	///分配空间
        reallocate(oldCapacity, capacity, true);
    }
    ///省略代码
 }

#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
    // When we have a cache end marker it fills a bucket slot, so having a
    // initial cache size of 2 buckets would not be efficient when one of the
    // slots is always filled with the end marker. So start with a cache size
    // 4 buckets.
    INIT_CACHE_SIZE_LOG2 = 2,
#else
    // Allow an initial bucket size of 2 buckets, since a large number of
    // classes, especially metaclasses, have very few imps, and we support
    // the ability to fill 100% of the cache before resizing.
    INIT_CACHE_SIZE_LOG2 = 1,
#endif
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),

看上面代码可以看出，换出的初始大小为4（metaclasses这种有很少imps为2），最后调用reallocate函数

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity);
    }
}
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    // objc_msgSend uses mask and buckets with no locks.
    // It is safe for objc_msgSend to see new buckets but old mask.
    // (It will get a cache miss but not overrun the buckets' bounds).
    // It is unsafe for objc_msgSend to see old buckets and new mask.
    // Therefore we write new buckets, wait a lot, then write new mask.
    // objc_msgSend reads mask first, then buckets.

#ifdef __arm__
    // ensure other threads see buckets contents before buckets pointer
    mega_barrier();

    _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_relaxed);

    // ensure other threads see new buckets before new mask
    mega_barrier();

    _maybeMask.store(newMask, memory_order_relaxed);
    _occupied = 0;
#elif __x86_64__ || i386
    // ensure other threads see buckets contents before buckets pointer
    _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_release);

    // ensure other threads see new buckets before new mask
    _maybeMask.store(newMask, memory_order_release);
    _occupied = 0;
#else
#error Don't know how to do setBucketsAndMask on this architecture.
#endif
}

可以看到_bucketsAndMaybeMask里的newBuckets为newCapacity - 1，我们看capacity()方法也能验证这一点

unsigned cache_t::capacity() const
{
    return mask() ? mask()+1 : 0; 
}

继续探索`cache_t`

通过前面的探索，我们知道了 cache_t 实质上是缓存了我们类的实例方法，那么对于类方法来说，自然就是缓存在了元类上了。这一点我相信读者应该都能理解。

方法缓存策略

直观的感受就是会在insert的时候缓存，我们继续看insert方法

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    runtimeLock.assertLocked();//加锁
    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {//initialize调用之后才会缓存
        return;
    }
    if (isConstantOptimizedCache()) {//内联函数 return false 所以if里不会执行
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }
#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

方法缓存是否有序

方法缓存是无序的，这是因为计算缓存下标是一个哈希算法:

static inline mask_t cache_hash(cache_key_t key, mask_t mask) 
{
    return (mask_t)(key & mask);
}

通过 cache_hash 之后计算出来的下标并不是有序的，下标值取决于 key 和 mask 的值。

bucket 与 mask, capacity, sel, imp 的关系

一个类有一个属性 cache_t，而一个 cache_t 的 buckets 会有多个 bucket。一个 bucket 存储的是 imp 和 cache_key_t 。

mask 的值对于 bucket 来说，主要是用来在缓存查找时的哈希算法。
而 capacity 则可以获取到 cache_t 中 bucket 的数量。

sel 在缓存的时候是被强转成了 cache_key_t 的形式，更方便查询使用。
imp 则是函数指针，也就是方法的具体实现，缓存的主要目的就是通过一系列策略让编译器更快的执行消息发送的逻辑。

总结

OC 中实例方法缓存在类上面，类方法缓存在元类上面。
cache_t 缓存会提前进行扩容防止溢出。
方法缓存是为了最大化的提高程序的执行效率。
苹果在方法缓存这里用的是开放寻址法来解决哈希冲突。
通过 cache_t 我们可以进一步延伸去探究 objc_msgSend，因为查找方法缓存是属于 objc_msgSend 查找方法实现的快速流程。

探索cache_t

cache_t的基本结构

继续探索cache_t