NPTL:lll_lock() 和 lll_unlock() 的实现

本文深入探讨了NPTL库中lll_lock()和lll_unlock()函数的实现细节,揭示了它们如何利用sys_futex()系统调用和GCC内置的原子性操作函数进行线程同步。通过分析,我们了解到这些函数如何通过FUTEX_WAIT和FUTEX_WAKE操作来挂起和恢复线程,以及如何使用__sync_lock_test_and_set()和__sync_synchronize()等函数确保锁的原子性访问。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

NPTL 库中 lll_lock() 和 lll_unlock() 的实现用到了:

  • sys_futex() 使用了 FUTEX_WAIT 操作,使得调用者线程被挂起;使用 FUTEX_WAKE 操作来恢复之前被挂起的线程

  • 使用了 GCC 内置的原子性操作函数,__sync_lock_test_and_set()、__sync_synchronize()

  • CAS 的实现借助了 GCC 内置的原子性操作函数 __sync_bool_compare_and_swap()

总结

  • 使用系统调用 sys_futex() 来完成调用者线程的挂起和恢复

  • 使用 GCC 内置的原子性操作函数,来完成对锁 futex 的同步访问(读/写)

关于 GCC 内置的原子性操作函数,参考 https://gcc.gnu.org/onlinedocs/gcc-4.1.2/gcc/Atomic-Builtins.html#Atomic-Builtins

测试程序,这是可以编译并测试的程序,测试平台 x86_64 64位,代码是从 NPTL 源码中整理出来的,基于 x86_64 64位进行了整理和修改。

/* For test  */
/* 测试平台 x86_64 64位 */
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdint.h>
#include <x86_64-linux-gnu/asm/unistd_64.h>
#include <sys/prctl.h>
/* For test end */

#define LLL_PRIVATE        0
#define LLL_SHARED        128

#define FUTEX_WAIT        0
#define FUTEX_WAKE        1

/*
#define __glibc_unlikely(cond)    __builtin_except((cond), 0)
#define __glibc_likely(cond)    __builtin_except((cond), 1)
#define __builtin_except(expr, val)    (expr)
*/


/* GCC 内置函数 __sync_synchronize() 和 __sync_lock_test_and_set() */
#define atomic_exchange_rel(mem, value) \
  (__sync_synchronize (), __sync_lock_test_and_set (mem, value))


/* Unconditionally set FUTEX to 0 (not acquired), releasing the lock.  If FUTEX
   was >1 (acquired, possibly with waiters), then wake any waiters.  The waiter
   that acquires the lock will set FUTEX to >1.  */
#define __lll_unlock(futex, private)                    \
  ((void)                                               \
   ({                                                   \
     int *__futex = (futex);                            \
     int __oldval = atomic_exchange_rel (__futex, 0);   \
     if (__glibc_unlikely (__oldval > 1))               \
       lll_futex_wake (__futex, 1, private);            \
   }))

#define lll_unlock(futex, private)    \
  __lll_unlock (&(futex), private)


/* CAS */
#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval)    \
    (!__sync_bool_compare_and_swap(mem, oldval, newval))


#define lll_lock(futex, private)    \
        __lll_lock(&(futex), private)


/* This is an expression rather than a statement even though its value is
   void, so that it can be used in a comma expression or as an expression
   that's cast to void.  */
/* The inner conditional compiles to a call to __lll_lock_wait_private if
   private is known at compile time to be LLL_PRIVATE, and to a call to
   __lll_lock_wait otherwise.  */

/* If FUTEX is 0 (not acquired), set to 1 (acquired with no waiters) and
   return.  Otherwise, ensure that it is >1 (acquired, possibly with waiters)
   and then block until we acquire the lock, at which point FUTEX will still be
   >1.  The lock is always acquired on return. */

#define __lll_lock(futex, private)  \
((void)                                \
  ({                                \
    int *__futex = (futex);         \
    if (__glibc_unlikely            \
        (atomic_compare_and_exchange_bool_acq(__futex, 1, 0))) \
        {                                                        \
            if (__builtin_constant_p(private) && (private) == LLL_PRIVATE) \
                __lll_lock_wait_private(__futex);    \
            else                                    \
                __lll_lock_wait(__futex, private);    \
        }                                            \
  })                                                \
)                                                    \


/* 1 if 'type' is a pointer type, 0 otherwise.  */
# define __pointer_type(type) (__builtin_classify_type ((type) 0) == 5)
/* __intptr_t if P is true, or T if P is false.  */
# define __integer_if_pointer_type_sub(T, P) \
  __typeof__ (*(0 ? (__typeof__ (0 ? (T *) 0 : (void *) (P))) 0 \
          : (__typeof__ (0 ? (__intptr_t *) 0 : (void *) (!(P)))) 0))

/* __intptr_t if EXPR has a pointer type, or the type of EXPR otherwise.  */
# define __integer_if_pointer_type(expr) \
  __integer_if_pointer_type_sub(__typeof__ ((__typeof__ (expr)) 0), \
                __pointer_type (__typeof__ (expr)))

/* Cast an integer or a pointer VAL to integer with proper type.  */
#define cast_to_integer(val) ((__integer_if_pointer_type (val)) (val))


/* x86_64 */
/* 使用 xchg 指令实现原子性交换 */
#define atomic_exchange_acq(mem, newvalue) \
  ({ __typeof (*mem) result;                              \
     if (sizeof (*mem) == 1)                              \
       __asm __volatile ("xchgb %b0, %1"                  \
             : "=q" (result), "=m" (*mem)                  \
             : "0" (newvalue), "m" (*mem));                  \
     else if (sizeof (*mem) == 2)                          \
       __asm __volatile ("xchgw %w0, %1"                  \
             : "=r" (result), "=m" (*mem)                  \
             : "0" (newvalue), "m" (*mem));                  \
     else if (sizeof (*mem) == 4)                          \
       __asm __volatile ("xchgl %0, %1"                      \
             : "=r" (result), "=m" (*mem)                  \
             : "0" (newvalue), "m" (*mem));                  \
     else                                                    \
       __asm __volatile ("xchgq %q0, %1"                  \
             : "=r" (result), "=m" (*mem)                  \
             : "0" ((int64_t) cast_to_integer (newvalue)),     \
               "m" (*mem));                                  \
     result; })


/* x86_64 */
# define LOAD_ARGS_0()
# define LOAD_REGS_0
# define ASM_ARGS_0

# define LOAD_ARGS_TYPES_1(t1, a1)                       \
  t1 __arg1 = (t1) (a1);                           \
  LOAD_ARGS_0 ()
# define LOAD_REGS_TYPES_1(t1, a1)                       \
  register t1 _a1 asm ("rdi") = __arg1;                       \
  LOAD_REGS_0
# define ASM_ARGS_1    ASM_ARGS_0, "r" (_a1)
# define LOAD_ARGS_1(a1)                           \
  LOAD_ARGS_TYPES_1 (long int, a1)
# define LOAD_REGS_1                               \
  LOAD_REGS_TYPES_1 (long int, a1)
# define LOAD_ARGS_TYPES_2(t1, a1, t2, a2)                   \
  t2 __arg2 = (t2) (a2);                           \
  LOAD_ARGS_TYPES_1 (t1, a1)
# define LOAD_REGS_TYPES_2(t1, a1, t2, a2)                   \
  register t2 _a2 asm ("rsi") = __arg2;                       \
  LOAD_REGS_TYPES_1(t1, a1)
# define ASM_ARGS_2    ASM_ARGS_1, "r" (_a2)
# define LOAD_ARGS_2(a1, a2)                           \
  LOAD_ARGS_TYPES_2 (long int, a1, long int, a2)
# define LOAD_REGS_2                               \
  LOAD_REGS_TYPES_2 (long int, a1, long int, a2)
# define LOAD_ARGS_TYPES_3(t1, a1, t2, a2, t3, a3)               \
  t3 __arg3 = (t3) (a3);                           \
  LOAD_ARGS_TYPES_2 (t1, a1, t2, a2)
# define LOAD_REGS_TYPES_3(t1, a1, t2, a2, t3, a3)               \
  register t3 _a3 asm ("rdx") = __arg3;                       \
  LOAD_REGS_TYPES_2(t1, a1, t2, a2)
# define ASM_ARGS_3    ASM_ARGS_2, "r" (_a3)
# define LOAD_ARGS_3(a1, a2, a3)                       \
  LOAD_ARGS_TYPES_3 (long int, a1, long int, a2, long int, a3)
# define LOAD_REGS_3                               \
  LOAD_REGS_TYPES_3 (long int, a1, long int, a2, long int, a3)
# define LOAD_ARGS_TYPES_4(t1, a1, t2, a2, t3, a3, t4, a4)           \
  t4 __arg4 = (t4) (a4);                           \
  LOAD_ARGS_TYPES_3 (t1, a1, t2, a2, t3, a3)
# define LOAD_REGS_TYPES_4(t1, a1, t2, a2, t3, a3, t4, a4)           \
  register t4 _a4 asm ("r10") = __arg4;                       \
  LOAD_REGS_TYPES_3(t1, a2, t2, a2, t3, a3)
# define ASM_ARGS_4    ASM_ARGS_3, "r" (_a4)
# define LOAD_ARGS_4(a1, a2, a3, a4)                       \
  LOAD_ARGS_TYPES_4 (long int, a1, long int, a2, long int, a3,           \
             long int, a4)

# define LOAD_REGS_4                               \
  LOAD_REGS_TYPES_4 (long int, a1, long int, a2, long int, a3,           \
             long int, a4)
# define LOAD_ARGS_TYPES_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5)       \
  t5 __arg5 = (t5) (a5);                           \
  LOAD_ARGS_TYPES_4 (t1, a1, t2, a2, t3, a3, t4, a4)
# define LOAD_REGS_TYPES_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5)       \
  register t5 _a5 asm ("r8") = __arg5;                       \
  LOAD_REGS_TYPES_4 (t1, a1, t2, a2, t3, a3, t4, a4)
# define ASM_ARGS_5    ASM_ARGS_4, "r" (_a5)
# define LOAD_ARGS_5(a1, a2, a3, a4, a5)                   \
  LOAD_ARGS_TYPES_5 (long int, a1, long int, a2, long int, a3,           \
             long int, a4, long int, a5)
# define LOAD_REGS_5                               \
  LOAD_REGS_TYPES_5 (long int, a1, long int, a2, long int, a3,           \
             long int, a4, long int, a5)
# define LOAD_ARGS_TYPES_6(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5, t6, a6) \
  t6 __arg6 = (t6) (a6);                           \
  LOAD_ARGS_TYPES_5 (t1, a1, t2, a2, t3, a3, t4, a4, t5, a5)
# define LOAD_REGS_TYPES_6(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5, t6, a6) \
  register t6 _a6 asm ("r9") = __arg6;                       \
  LOAD_REGS_TYPES_5 (t1, a1, t2, a2, t3, a3, t4, a4, t5, a5)
# define ASM_ARGS_6    ASM_ARGS_5, "r" (_a6)
# define LOAD_ARGS_6(a1, a2, a3, a4, a5, a6)                   \
  LOAD_ARGS_TYPES_6 (long int, a1, long int, a2, long int, a3,           \
             long int, a4, long int, a5, long int, a6)
# define LOAD_REGS_6                               \
  LOAD_REGS_TYPES_6 (long int, a1, long int, a2, long int, a3,           \
             long int, a4, long int, a5, long int, a6)
#define INTERNAL_SYSCALL(name, err, nr, args...)    \
    INTERNAL_SYSCALL_NCS(__NR_##name, err, nr, ##args)


/* x86_64 */
#define INTERNAL_SYSCALL_NCS(name, err, nr, args...)    \
    ({                                        \
        unsigned long int resultvar;        \
        LOAD_ARGS_##nr (args)                \
        LOAD_REGS_##nr                        \
        asm volatile (                        \
            "syscall\n\t"                    \
            : "=a" (resultvar)                \
            : "0" (name) ASM_ARGS_##nr         \
            : "memory", "cc", "r11", "cx"     \
        );                                    \
        (long int) resultvar;                 \
    })


#define INTERNAL_SYSCALL_DECL(err)
/* Wait while *FUTEXP == VAL for an lll_futex_wake call on FUTEXP. */        
#define lll_futex_wait(futexp, val, private) \
    lll_futex_timed_wait(futexp, val, NULL, private)
/* INTERNAL_SYSCALL 将执行系统调用 sys_futex() */
#define lll_futex_timed_wait(futexp, val, timespec, private)         \
    ({                                                                 \
        INTERNAL_SYSCALL_DECL(__err);                                 \
        long int __ret;                                                 \
        __ret = INTERNAL_SYSCALL(futex, __err, 4,                      \
                        (futexp), FUTEX_WAIT, (val), (timespec));    \
        __ret;                                                         \
    })

#define lll_futex_wake(futexp, nr, private)                \
  ({                                                    \
        INTERNAL_SYSCALL_DECL (__err);                    \
        long int __ret;                                    \
        __ret = INTERNAL_SYSCALL (futex, __err, 4,         \
                       (futexp), FUTEX_WAKE, (nr), 0);    \
        __ret;                                            \
  })


void __lll_lock_wait_private (int *futex)
{
    if (*futex == 2)
        lll_futex_wait (futex, 2, LLL_PRIVATE); /* Wait if *futex == 2.  */

      /* 功能:将 *futex 设置为 2 并返回 *futex 的原来的值。
       所以如果其它线程已经释放了 futex,也就是已经把 *futex
       设置为 0 的话,此时 atomic_exchange_acq() 的调用将会把
       *futex 设置为 2 并返回 0 退出 while,调用线程获得 futex */

    while (atomic_exchange_acq (futex, 2) != 0)
        lll_futex_wait (futex, 2, LLL_PRIVATE); /* Wait if *futex == 2.  */
}


/* These functions don't get included in libc.so  */
/* 不会用到该函数 */
void __lll_lock_wait (int *futex, int private)
{

  if (*futex == 2)
    lll_futex_wait (futex, 2, private); /* Wait if *futex == 2.  */

  while (atomic_exchange_acq (futex, 2) != 0)
    lll_futex_wait (futex, 2, private); /* Wait if *futex == 2.  */
}


/* Test */
#define TASK_NUM 20
#define COUNT 100
static int sum = 0;
static int futex = 0;
static int go = 0;

static void *fn(void *arg)
{
    int i;
    static int n = 'A';
    char name[2] = {'\0'};
    name[0] = (char) (n+(unsigned long int)arg);

    prctl(PR_SET_NAME, name);
    while (!go);

    for (i=0; i<COUNT; i++) {        
        lll_lock(futex, LLL_PRIVATE); 
        sum++;        
        lll_unlock(futex, LLL_PRIVATE);
    }

    return NULL;
}

int main(int argc, char **argv)
{
    pthread_t task[TASK_NUM];
    unsigned long int i;
   
    for (i=0; i<TASK_NUM; i++)
        pthread_create(&task[i], NULL, fn, (void*) i);            
    go = 1;        
    for (i=0; i<TASK_NUM; i++)
        pthread_join(task[i], NULL);    

    printf("[Main pid=%d] sum=%d (should be %d)\n", getpid(), sum, TASK_NUM*COUNT);
    printf("Test %s\n", sum==TASK_NUM*COUNT ? "PASS" : "FAIL");

    return 0;
}

运行结果

[Main pid=6674] sum=2000 (should be 2000)
Test PASS

 

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值