线程和锁

# 12.线程和锁

# 目录介绍

12.1 线程
12.2 多线程
12.3 线程同步和锁

# 12.1 线程

在 C 语言中，线程是通过 POSIX 线程库（pthread） 来实现的。pthread 是 Unix/Linux 系统下的标准线程库，提供了创建、管理和同步线程的 API。

# 12.1.1 基本概念

线程：线程是程序执行的最小单元，一个进程可以包含多个线程，线程共享进程的内存空间。
多线程：多线程允许程序同时执行多个任务，提高程序的并发性和效率。

# 12.1.2 pthread介绍

使用 pthread 创建线程，头文件

#include <pthread.h>

使用 pthread_create 创建线程。
使用 pthread_join 等待线程结束。
通过 pthread_attr_t 设置线程属性。
线程函数可以通过 return 或 pthread_exit 返回结果。

# 12.1.3 创建线程

使用 pthread_create 函数创建线程：

int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg);

参数：

thread：指向线程标识符的指针。
attr：线程属性，通常为 NULL（使用默认属性）。
start_routine：线程执行的函数。
arg：传递给线程函数的参数。

返回值：成功返回 0，失败返回错误码。

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>

// 线程函数
void *thread_function(void *arg) {
    int *value = (int *)arg;
    printf("Thread is running, value = %d\n", *value);
    sleep(2); // 模拟线程执行
    printf("Thread is done\n");
    return NULL;
}

int main() {
    pthread_t thread;
    int value = 42;

    // 创建线程
    if (pthread_create(&thread, NULL, thread_function, &value) != 0) {
        perror("Failed to create thread");
        return 1;
    }

    // 等待线程结束
    if (pthread_join(thread, NULL) != 0) {
        perror("Failed to join thread");
        return 1;
    }

    printf("Main thread is done\n");
    return 0;
}

# 12.1.4 线程的终止

线程函数执行完毕后，线程会自动终止。
使用 pthread_cancel 强制终止线程：

pthread_cancel(thread);

# 12.1.5 线程分离

使用 pthread_detach 将线程设置为分离状态，线程结束后自动释放资源：

pthread_detach(thread);

# 12.1.6 线程属性

可以通过 pthread_attr_t 设置线程属性，如栈大小、调度策略等。示例代码

pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); // 设置线程为分离状态
pthread_create(&thread, &attr, thread_function, NULL);
pthread_attr_destroy(&attr);

# 12.1.7 线程返回值

线程函数可以通过 return 或 pthread_exit 返回结果，主线程使用 pthread_join 获取返回值。

示例代码

void *thread_function(void *arg) {
    int *result = malloc(sizeof(int));
    *result = 42;
    pthread_exit(result); // 返回结果
}

int main() {
    pthread_t thread;
    void *retval;

    pthread_create(&thread, NULL, thread_function, NULL);
    pthread_join(thread, &retval);

    printf("Thread returned: %d\n", *(int *)retval);
    free(retval);
    return 0;
}

# 12.1.8 综合案例与思考

综合案例：线程的创建、参数传递与返回值

#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>

// 线程参数结构体
typedef struct {
    int id;
    int start;
    int end;
} TaskParam;

// 线程结果结构体
typedef struct {
    int thread_id;
    long sum;
} TaskResult;

// 线程函数：计算区间和
void *calc_sum(void *arg) {
    TaskParam *param = (TaskParam *)arg;
    TaskResult *result = (TaskResult *)malloc(sizeof(TaskResult));
    
    result->thread_id = param->id;
    result->sum = 0;
    for (int i = param->start; i <= param->end; i++) {
        result->sum += i;
    }
    
    printf("线程%d: 计算 %d~%d 的和 = %ld\n",
           param->id, param->start, param->end, result->sum);
    
    return (void *)result;
}

int main() {
    // 将1~1000的求和任务分配给4个线程
    int num_threads = 4;
    int total = 1000;
    int chunk = total / num_threads;
    
    pthread_t threads[4];
    TaskParam params[4];
    
    printf("=== 多线程并行计算 ===\n");
    for (int i = 0; i < num_threads; i++) {
        params[i].id = i + 1;
        params[i].start = i * chunk + 1;
        params[i].end = (i == num_threads - 1) ? total : (i + 1) * chunk;
        pthread_create(&threads[i], NULL, calc_sum, &params[i]);
    }
    
    // 收集结果
    long total_sum = 0;
    for (int i = 0; i < num_threads; i++) {
        TaskResult *result;
        pthread_join(threads[i], (void **)&result);
        total_sum += result->sum;
        free(result);
    }
    
    printf("\n总和: %ld (验证: %d)\n", total_sum, total * (total + 1) / 2);
    
    return 0;
}
// 编译: gcc -o thread_sum thread_sum.c -lpthread

原理说明：线程共享进程的地址空间（堆、全局变量、代码段），但每个线程有独立的栈。传递参数给线程函数时要注意：如果传递局部变量的地址，必须确保该变量在线程使用期间不会被销毁或修改。线程返回值通过 pthread_join 的第二个参数获取，返回的是 void *，通常指向动态分配的内存（由接收方负责释放）。创建线程有开销，任务太小时使用多线程反而比单线程慢。

思考题：

如果在循环中 pthread_create(&threads[i], NULL, calc_sum, &i) 直接传 &i，会有什么问题？
pthread_join 是阻塞调用，如果不调用 join 也不 detach，会发生什么（线程资源泄漏）？
多线程程序中，printf 是线程安全的吗？多个线程同时 printf 输出会混乱吗？

# 12.2.1 多线程使用

多线程编程允许程序同时执行多个任务。常见的使用场景包括：

并行计算：将大任务拆分为多个子任务，各线程并行执行。
I/O密集型任务：一个线程等待I/O时，其他线程可以继续运行。
GUI程序：主线程处理用户交互，后台线程处理耗时操作。

#include <stdio.h>
#include <pthread.h>

void *task(void *arg) {
    int id = *(int *)arg;
    printf("线程 %d 正在执行\n", id);
    return NULL;
}

int main() {
    pthread_t threads[3];
    int ids[] = {1, 2, 3};
    
    for (int i = 0; i < 3; i++) {
        pthread_create(&threads[i], NULL, task, &ids[i]);
    }
    for (int i = 0; i < 3; i++) {
        pthread_join(threads[i], NULL);
    }
    return 0;
}

# 12.2.2 多线程问题

多线程编程中常见的问题包括：

竞态条件（Race Condition）：多个线程同时读写共享数据，结果取决于执行顺序。
死锁（Deadlock）：两个线程互相等待对方持有的锁，导致程序永远阻塞。
数据竞争（Data Race）：多个线程同时访问同一内存位置，至少有一个是写操作。

// 竞态条件示例：多线程对共享变量自增
int counter = 0;  // 共享变量

void *increment(void *arg) {
    for (int i = 0; i < 100000; i++) {
        counter++;  // 非原子操作！可能丢失更新
    }
    return NULL;
}
// 两个线程各自增100000次，期望200000，实际结果可能小于200000

# 12.2.3 思考和疑惑

常见的多线程疑问：

线程越多越好吗？ 不是。线程数超过CPU核心数后，频繁的上下文切换反而降低性能。通常线程数设为CPU核心数或核心数+1。
线程和进程的区别？ 进程有独立的地址空间，线程共享进程地址空间。线程创建和切换的开销比进程小得多。
如何避免竞态条件？ 使用互斥锁、原子操作、读写锁等同步机制保护共享数据。

# 12.3 线程同步和锁

多线程程序中，线程之间可能会竞争共享资源，导致数据不一致。常用的同步机制包括 互斥锁 和 条件变量。

# 12.3.1 互斥锁Mutex

互斥锁用于保护共享资源，确保同一时间只有一个线程访问资源。

#include <stdio.h>
#include <pthread.h>

int shared_value = 0;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

void *thread_function(void *arg) {
    for (int i = 0; i < 100000; i++) {
        pthread_mutex_lock(&mutex); // 加锁
        shared_value++;
        pthread_mutex_unlock(&mutex); // 解锁
    }
    return NULL;
}

int main() {
    pthread_t thread1, thread2;

    pthread_create(&thread1, NULL, thread_function, NULL);
    pthread_create(&thread2, NULL, thread_function, NULL);

    pthread_join(thread1, NULL);
    pthread_join(thread2, NULL);

    printf("Shared value = %d\n", shared_value);
    return 0;
}

# 12.3.2 条件变量

条件变量用于线程之间的通信，通常与互斥锁一起使用。示例代码

#include <stdio.h>
#include <pthread.h>

int ready = 0;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;

void *producer(void *arg) {
    pthread_mutex_lock(&mutex);
    ready = 1;
    printf("Producer: Data is ready\n");
    pthread_cond_signal(&cond); // 通知消费者
    pthread_mutex_unlock(&mutex);
    return NULL;
}

void *consumer(void *arg) {
    pthread_mutex_lock(&mutex);
    while (!ready) {
        pthread_cond_wait(&cond, &mutex); // 等待条件变量
    }
    printf("Consumer: Data is consumed\n");
    pthread_mutex_unlock(&mutex);
    return NULL;
}

int main() {
    pthread_t thread1, thread2;

    pthread_create(&thread1, NULL, consumer, NULL);
    pthread_create(&thread2, NULL, producer, NULL);

    pthread_join(thread1, NULL);
    pthread_join(thread2, NULL);

    return 0;
}

# 12.3.3 综合案例与思考

综合案例：生产者-消费者模型

#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>

#define BUFFER_SIZE 5

// 共享缓冲区
int buffer[BUFFER_SIZE];
int count = 0;
int in_idx = 0, out_idx = 0;

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t not_full = PTHREAD_COND_INITIALIZER;
pthread_cond_t not_empty = PTHREAD_COND_INITIALIZER;

// 生产者
void *producer(void *arg) {
    int id = *(int *)arg;
    for (int i = 0; i < 8; i++) {
        int item = id * 100 + i;
        
        pthread_mutex_lock(&mutex);
        while (count == BUFFER_SIZE) {
            pthread_cond_wait(&not_full, &mutex);  // 缓冲区满，等待
        }
        
        buffer[in_idx] = item;
        in_idx = (in_idx + 1) % BUFFER_SIZE;
        count++;
        printf("生产者%d: 生产 %d (缓冲区: %d/%d)\n",
               id, item, count, BUFFER_SIZE);
        
        pthread_cond_signal(&not_empty);  // 通知消费者
        pthread_mutex_unlock(&mutex);
        
        usleep(100000);  // 模拟生产耗时
    }
    return NULL;
}

// 消费者
void *consumer(void *arg) {
    int id = *(int *)arg;
    for (int i = 0; i < 8; i++) {
        pthread_mutex_lock(&mutex);
        while (count == 0) {
            pthread_cond_wait(&not_empty, &mutex);  // 缓冲区空，等待
        }
        
        int item = buffer[out_idx];
        out_idx = (out_idx + 1) % BUFFER_SIZE;
        count--;
        printf("消费者%d: 消费 %d (缓冲区: %d/%d)\n",
               id, item, count, BUFFER_SIZE);
        
        pthread_cond_signal(&not_full);  // 通知生产者
        pthread_mutex_unlock(&mutex);
        
        usleep(150000);  // 模拟消费耗时
    }
    return NULL;
}

int main() {
    pthread_t prod_threads[2], cons_threads[2];
    int prod_ids[] = {1, 2};
    int cons_ids[] = {1, 2};
    
    printf("=== 生产者-消费者模型 ===\n");
    
    // 创建2个生产者和2个消费者
    for (int i = 0; i < 2; i++) {
        pthread_create(&prod_threads[i], NULL, producer, &prod_ids[i]);
        pthread_create(&cons_threads[i], NULL, consumer, &cons_ids[i]);
    }
    
    for (int i = 0; i < 2; i++) {
        pthread_join(prod_threads[i], NULL);
        pthread_join(cons_threads[i], NULL);
    }
    
    // 销毁同步原语
    pthread_mutex_destroy(&mutex);
    pthread_cond_destroy(&not_full);
    pthread_cond_destroy(&not_empty);
    
    printf("\n所有线程完成\n");
    return 0;
}
// 编译: gcc -o prodcons prodcons.c -lpthread

原理说明：生产者-消费者是多线程编程中最经典的模型。核心要素：1）互斥锁保护共享缓冲区，确保同一时刻只有一个线程修改数据；2）条件变量 not_full 和 not_empty 实现线程间的通知机制——生产者在缓冲区满时等待，消费者在缓冲区空时等待；3）用 while 而非 if 检查条件，防止虚假唤醒（spurious wakeup）。环形缓冲区用取模运算实现，避免了数据移动的开销。

思考题：

条件变量的 wait 为什么要放在 while 循环中而不是 if 中？什么是虚假唤醒？
如果把 pthread_cond_signal 换成 pthread_cond_broadcast，会有什么影响？什么场景下应该用 broadcast？
除了互斥锁+条件变量，还有哪些线程同步机制？信号量（semaphore）和互斥锁有什么区别？

上次更新: 2026/06/28, 17:55:19

← 结构体预处理器→