Android8 native层Thread源码分析学习笔记(通俗易懂版)

原创已于 2025-08-21 17:39:49 修改 · 1.1k 阅读

20 ·

CC 4.0 BY-SA版权

文章标签：

#学习 #笔记 #安卓framework开发 #安卓源码分析 #java #c++

于 2025-08-21 17:39:12 首次发布

源码位置：

/system/core/libutils/Threads.cpp
/system/core/libutils/include/utils/Thread.h

Thread.cpp源码分析

首先来看代码的入口run方法。主要是加锁，引用自身并且根据传递的参数调用创建线程的方法createThreadETC或者androidCreateRawThreadEtc。具体请看下述代码注释：

status_t Thread::run(const char* name, int32_t priority, size_t stack)
{
    // 强制性检查，线程名不能为空
    LOG_ALWAYS_FATAL_IF(name == nullptr, "thread name not provided to Thread::run");
    // 加锁
    Mutex::Autolock _l(mLock);
    // 检查线程是否已经在运行状态
    if (mRunning) {
        // thread already started
        // 如果在运行则报错
        return INVALID_OPERATION;
    }
    
    // reset status and exitPending to their default value, so we can
    // try again after an error happened (either below, or in readyToRun())
    // 重置线程的状态变量，为本次启动做准备。
    mStatus = NO_ERROR;       // 清空上一次执行的结果状态
    mExitPending = false;     // 清除退出请求标志
    mThread = thread_id_t(-1); // 将线程ID设置为无效值

    // hold a strong reference on ourself
    // 最关键的操作之一：持有对自己的强引用 (sp<Thread>)。
    // `mHoldSelf` 是一个 sp<Thread> 类型的成员变量。
    // `this` 是一个裸指针，赋值给 sp<Thread> 会使其引用计数加1。
    // 目的：防止一种极端情况——调用 `thread->run()` 后，外部立即释放了对 `thread` 对象的唯一引用。
    //       如果没有这个内部强引用，Thread 对象会在新线程开始执行 `_threadLoop` 之前就被销毁，导致新线程访问无效内存。
    //       这个强引用保证了 Thread 对象至少会存活到 `_threadLoop` 函数中取得它并自己管理引用为止。
    mHoldSelf = this;

    mRunning = true;

    bool res;
    // 根据 mCanCallJava 标志决定创建线程的方式。这个标志通常在 Thread 子类的构造函数中设定。
    if (mCanCallJava) {
        // 如果线程需要调用 Java 代码（通过 JNI），则使用 createThreadEtc（这种情况很少）。
        // 这个函数内部会做一些额外的准备工作，例如 attaching 线程到 Java 虚拟机（JVM），
        // 确保 JNI 调用是安全的。
        res = createThreadEtc(_threadLoop, // 线程入口函数，固定为这个静态方法,后面会讲解这个静态方法。
                this,                      // 传递给入口函数的参数，这里是 Thread 对象本身
                name, priority, stack,     // 线程名、优先级、栈大小
                &mThread);                 // 用于接收底层系统返回的线程ID
    } else {
        // 如果线程是纯粹的 Native 线程，不涉及 JNI，则使用更轻量的 androidCreateRawThreadEtc。
        // 直接封装了 pthread_create 等系统调用，后面将进行分析
        res = androidCreateRawThreadEtc(_threadLoop,
                this, name, priority, stack, &mThread);
    }
    // 检查底层线程创建是否成功。
    if (res == false) {
        
       // 如果创建失败，进行彻底的清理工作：
        mStatus = UNKNOWN_ERROR;   // 设置错误状态
        mRunning = false;          // 重置运行标志
        mThread = thread_id_t(-1); // 重置线程ID
        // 释放之前持有的自身强引用。
        // 因为线程创建失败，不会有新线程来接管引用，所以这里必须释放。
        // 注释警告 "this may have gone away after this" 是因为：如果这个 release() 操作
        // 将引用计数减为0，那么 this 指向的当前对象会立即被销毁。所以在这行代码之后，
        // 不能再访问任何成员变量。但在此处，清理工作已经完成，是安全的。
        mHoldSelf.clear();  // "this" may have gone away after this.

        return UNKNOWN_ERROR;
    }

    // Do not refer to mStatus here: The thread is already running (may, in fact
    // already have exited with a valid mStatus result). The NO_ERROR indication
    // here merely indicates successfully starting the thread and does not
    // imply successful termination/execution.
    
    // 线程创建成功。此时，新线程可能已经在另一个CPU核心上运行起来了，甚至可能已经执行完毕
    // 因此，绝对不能在此处读取 mStatus，因为它可能已经被新线程修改。

    // 返回 NO_ERROR 仅表示【线程启动】这一步成功，并不代表线程中执行的业务逻辑（threadLoop）也成功。
    // 业务逻辑的成功与否，需要通过 join() 或 requestExitAndWait() 之后检查 mStatus 来确定。
    return NO_ERROR;

    // Exiting scope of mLock is a memory barrier and allows new thread to run
}

然后再来看看分析androidCreateRawThreadEtc方法。首先会判断是否指定了线程名或者指定了优先级，如果是的话，那么就填充构造器。

那么先来分析一下线程数据结构体。它包含一个静态方法trampoline，主要是作为跳板函数。来看看这个函数，首先是获取结构体中的数据，然后释放之前构造体的内存，再设置线程的优先级，根据优先级设置不同的执行策略，有前台策略和后台策略两种方式。最后return执行了之前传递过来的入口函数，即_threadLoop静态方法。

分析完构造体之后，再回到androidCreateRawThreadEtc方法。创建完线程数据结构体以后，会执行最重要的posix线程创建方法pthread_create（注意此处将结构体作为参数传递进去了）。

// thread_data_t结构体（容器）
struct thread_data_t {
    thread_func_t   entryFunction; // 原始线程的入口函数
    void*           userData;
    int             priority;
    char *          threadName;

    // we use this trampoline when we need to set the priority with
    // nice/setpriority, and name with prctl.
    // 跳板函数，在实际线程函数执行前设置优先级和名称
    static int trampoline(const thread_data_t* t) {
        thread_func_t f = t->entryFunction;  // 保存原始函数指针
        void* u = t->userData;               // 保存用户数据
        int prio = t->priority;              // 保存优先级
        char * name = t->threadName;         // 保存线程名
        
        delete t;  // 立即释放线程数据内存（不再需要）
        // 设置线程优先级
        setpriority(PRIO_PROCESS, 0, prio);
        if (prio >= ANDROID_PRIORITY_BACKGROUND) {
            // 后台线程策略
            set_sched_policy(0, SP_BACKGROUND);
        } else {
            // 前台线程策略
            set_sched_policy(0, SP_FOREGROUND);
        }

        if (name) {
            androidSetThreadName(name);
            free(name);
        }
        // 调用原始线程函数并返回结果
        return f(u);
    }
};



----------------------------------------------------------------------------------

int androidCreateRawThreadEtc(android_thread_func_t entryFunction,
                               void *userData,
                               const char* threadName __android_unused,
                               int32_t threadPriority,
                               size_t threadStackSize,
                               android_thread_id_t *threadId)
{
    pthread_attr_t attr;
    // 初始化线程属性
    pthread_attr_init(&attr);
    // 设置线程为分离状态（无需pthread_join等待）
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);

// 仅在Android平台处理优先级和线程名设置
#if defined(__ANDROID__)  /* valgrind拒绝RT优先级创建请求 */
    // 如果设置了非默认优先级或指定了线程名
    if (threadPriority != PRIORITY_DEFAULT || threadName != NULL) {
        // 需要通过跳板函数设置线程属性
        thread_data_t* t = new thread_data_t;  // 创建线程数据容器
        t->priority = threadPriority;          // 存储优先级
        t->threadName = threadName ? strdup(threadName) : NULL;  // 复制线程名
        t->entryFunction = entryFunction;      // 存储原始入口函数
        t->userData = userData;                // 存储原始用户数据
        
        // 将入口函数替换为跳板函数
        entryFunction = (android_thread_func_t)&thread_data_t::trampoline;
        userData = t;  // 将线程数据作为新参数传递
    }
#endif

    // 设置自定义栈大小（如果非零）
    if (threadStackSize) {
        pthread_attr_setstacksize(&attr, threadStackSize);
    }

    errno = 0;  // 清空错误码
    pthread_t thread;
    // 创建POSIX线程（最重要）。注意此时将userData传递进去了！！！
    int result = pthread_create(&thread, &attr,
                    (android_pthread_entry)entryFunction, userData);
    pthread_attr_destroy(&attr);  // 销毁属性对象

    // 错误处理
    if (result != 0) {
        ALOGE("androidCreateRawThreadEtc failed (entry=%p, res=%d, %s)\n"
             "(android threadPriority=%d)",
            entryFunction, result, strerror(errno), threadPriority);
        return 0;  // 创建失败返回0
    }

    // 返回线程标识
    if (threadId != NULL) {
        *threadId = (android_thread_id_t)thread; 
    }
    return 1;  // 成功返回1
}

这里可能有点难以理解，创建线程之后是怎么执行到_threadLoop方法的？

让我们用两个角色的视角来看待这段代码：

角色一：主线程（调用者）

准备阶段：主线程执行 androidCreateRawThreadEtc 函数。它初始化属性 attr，如果需要设置优先级或线程名，它就在堆（Heap）上创建一个 thread_data_t 结构体 t，并将原始的函数 (entryFunction) 和数据 (userData) 打包进去。
发起创建：主线程调用 pthread_create。这个函数告诉操作系统：“请创建一个新的线程，这个新线程应该以 entryFunction 为起点开始运行，并把 userData 这个参数传给它”。
- 此时，entryFunction 已经被替换成了 trampoline 的地址。
- 而 userData 已经被替换成了指向我们刚创建的 thread_data_t 结构体 t 的指针。
继续执行：pthread_create 调用会立即返回。此时，新线程可能还没有开始运行，它只是被创建了，处于“就绪”状态，等待操作系统分配CPU时间片。主线程继续执行后面的代码（检查返回值、设置 threadId 等）。

角色二：新线程

总结：

诞生：在未来的某个不确定的时间点，操作系统调度器决定让这个新线程运行。
起点：新线程不是从 main 函数开始，它的起点就是 pthread_create 指定的那个函数——也就是 thread_data_t::trampoline。
执行跳板：新线程开始执行 trampoline 函数，并且接收到的参数就是主线程当初传递的 userData（即指向 thread_data_t 结构体 t 的指针）。
解包与设置：trampoline 函数从这个结构体 t 中解包出原始的函数指针 (f)、原始的用户数据 (u)、优先级 (prio) 和线程名 (name)。
清理与配置：它立即删除这个结构体 t（因为信息已经取出，不再需要），然后为当前线程（也就是这个新线程自己）设置优先级和名称。
执行真实任务：最后，它调用 return f(u);。这里才是执行用户最初想要在新线程中运行的那个函数（_threadLoop）的时刻。
pthread_create 是“注册”或“吩咐”：它是一条指令，告诉系统“去创建一个这样的线程”，这条指令本身执行得很快。

trampoline 是线程的“入口仪式”：它是新线程被CPU真正执行时做的第一件事，负责为自己“梳妆打扮”（设置属性），然后才开始干正事。
时间差：调用 pthread_create 和 trampoline 的实际执行之间存在一个不确定的、可能非常短暂的时间差。这就是并发编程的特点。

再通俗一点，pthead_create创建了一个子线程，但是这个线程还没有在cpu上执行，当真正执行以后，会进行属性设置并且传递到_threadLoop静态方法。

那么再来看看多次提到的_threadLoop方法。首先就是将参数转型为Thread类型，并且设置一个指针计数器。然后进入一个循环，在第一次进入循环时，会调用readyToRun方法对线程进行初始化，后面再次进入循环，就会调用用户定义的threadLoop方法。然后每次循环都会加锁并且检查退出条件是否满足。

int Thread::_threadLoop(void* user)
{
    // 将通用的 void* 类型的参数 user 转换回其原本的、具体的 Thread* 类型。
    Thread* const self = static_cast<Thread*>(user);
    // 智能指针强引用，防止Thread对象被销毁
    sp<Thread> strong(self->mHoldSelf);
    // 弱引用wp用于后续检查对象是否还存在
    wp<Thread> weak(strong);
    self->mHoldSelf.clear();

#if defined(__ANDROID__)
    // this is very useful for debugging with gdb
    // 获取并存储当前线程的系统真实ID（TID），便于调试和跟踪
    self->mTid = gettid();
#endif
    // 标记是否是第一次循环
    bool first = true;

    do {
        bool result;
        if (first) {
            first = false;
            // 调用readytoRun()进行线程初始化
            self->mStatus = self->readyToRun();
            // 检查初始化是否成功
            result = (self->mStatus == NO_ERROR);
            
            if (result && !self->exitPending()) {
                // Binder threads (and maybe others) rely on threadLoop
                // running at least once after a successful ::readyToRun()
                // (unless, of course, the thread has already been asked to exit
                // at that point).
                // This is because threads are essentially used like this:
                //   (new ThreadSubclass())->run();
                // The caller therefore does not retain a strong reference to
                // the thread and the thread would simply disappear after the
                // successful ::readyToRun() call instead of entering the
                // threadLoop at least once.
                // Binder线程（可能还有其他类型）依赖至少执行一次threadLoop()
                // 即使在readyToRun()成功后立即收到退出请求
                // 这是因为线程通常这样使用：(new ThreadSubclass())->run();
                // 调用者不会保留对线程的强引用，如果不在readyToRun()后
                // 至少执行一次threadLoop()，线程可能会消失
                result = self->threadLoop();
            }
        } else {
            // 非第一次循环，直接调用用户的threadLoop方法
            result = self->threadLoop();
        }

        // establish a scope for mLock
        // 互斥锁
        {
        // 自动加锁，退出作用域自动解锁
        Mutex::Autolock _l(self->mLock);
        // 检查退出条件：threadLoop返回false或收到退出请求
        if (result == false || self->mExitPending) {
            
            self->mExitPending = true;  // 标记退出 pending 状态
            self->mRunning = false;     // 标记线程不再运行
            // clear thread ID so that requestExitAndWait() does not exit if
            // called by a new thread using the same thread ID as this one.
            // 清除线程ID，防止requestExitAndWait()被使用相同线程ID的新线程误用
            self->mThread = thread_id_t(-1);
            // note that interested observers blocked in requestExitAndWait are
            // awoken by broadcast, but blocked on mLock until break exits scope
            // 通知所有在requestExitAndWait中阻塞的观察者
            self->mThreadExitedCondition.broadcast();
            break;
        }
        }

        // Release our strong reference, to let a chance to the thread
        // to die a peaceful death.
        // 引用计数管理：平衡对象生命周期与循环需求
        // 释放强引用，给线程一个 peacefully 退出的机会
        strong.clear();
        // And immediately, re-acquire a strong reference for the next loop
        // 立即重新获取强引用，用于下一次循环
        strong = weak.promote();
    } while(strong != 0);

    return 0;
}

在这个函数中有一个非常巧妙的设计，就是关于Thread生命周期的管理。

sp<Thread> strong(self->mHoldSelf); 在循环外创建一个强引用。

在循环内部，每次循环都会先释放strong的引用再立刻尝试获取引用。如果获取失败，那么代表线程已经挂掉了，就退出循环。
这样做当线程退出或死亡时，就会结束循环。防止其他地方修改了strong的计数，而循环内部的计数还不为0。

那我们再来看一下readyTorun方法，这个是用户自己进行定义的。

 // Good place to do one-time initializations
    virtual status_t    readyToRun();

再来分析一下threadLoop方法，这个方法是继承的子类必须实现的方法。同样由用户进行定义。

private:
    // Derived class must implement threadLoop(). The thread starts its life
    // here. There are two ways of using the Thread object:
    // 1) loop: if threadLoop() returns true, it will be called again if
    //          requestExit() wasn't called.
    // 2) once: if threadLoop() returns false, the thread will exit upon return.
    virtual bool        threadLoop() = 0;

至此，已经可以理解Thread调用run之后是怎么执行到定义的readyToRun和threadLoop方法的。后续将会学习并讲解posix_create是怎么创建线程的。