我有以下程序(缩写)来计算每个字母在使用线程的文件中出现的次数:
#define N_LETTERS 26U
#define IS_LETTER(x) (x >= 'A' && x <= 'Z')
#define HASH(x) toupper(x) - 'A'
typedef unsigned int uint;
typedef struct {
uint nt;
uint nc;
char filename[128];
} Args;
Args args = {0};
char *file_contents = NULL;
uint file_size;
uint chars_per_thread;
pthread_mutex_t mutex;
uint letter_count[N_LETTERS] = {0};
void get_program_args(int argc, char **argv) {
...
}
uint read_file(char *filename, char **buffer) {
....
return file_size;
}
void *count_letters(void *arg) {
uint start = *((int *)arg);
uint end = (start + chars_per_thread) >= file_size ? file_size : (start + chars_per_thread);
uint count[N_LETTERS] = {0};
for (uint i = start; i < end; i++) {
char c = toupper(file_contents[i]);
if (IS_LETTER(c)) {
count[HASH(c)]++;
}
}
pthread_mutex_lock(&mutex);
for (int i = 0; i < N_LETTERS; i++) {
letter_count[i] += count[i];
}
pthread_mutex_unlock(&mutex);
return NULL;
}
float letter_sum() {
...
return sum;
}
void print_letter_count() {
...
}
int main(int argc, char **argv) {
uint chars_counted = 0;
get_program_args(argc, argv);
file_size = read_file(args.filename, &file_contents);
pthread_mutex_init(&mutex, NULL);
pthread_t *threads = malloc(args.nt * sizeof(pthread_t));
if (threads == NULL) {
printf("Error allocating memory. \n");
exit(EXIT_FAILURE);
}
chars_per_thread = (file_size / args.nt) > args.nc ? args.nc : (file_size / args.nt);
while (chars_counted < file_size) {
for (int i = 0; i < args.nt; i++) {
uint start = chars_counted;
if (start >= file_size)
break;
pthread_create(&threads[i], NULL, count_letters, &start);
chars_counted += chars_per_thread;
}
for (int i = 0; i < args.nt; i++) {
pthread_join(threads[i], NULL);
}
}
print_letter_count();
pthread_mutex_destroy(&mutex);
free(threads);
free(file_contents); // free allocated memory
return 0;
}
每次我使用超过 1 个线程运行我的程序时,我都会得到不同的答案,它看起来就像我有一个竞争条件,但我找不到它。有人可以帮助我吗?
我尝试使用 mutex_lock 来修复它,但它仍然有同样的问题。
不确定为什么@yano 没有将他们的评论作为答案,但这就是问题所在。当你启动它时,你将一个指向
start
的指针传递给每个线程,然后你去更改 start
的值,同时该线程正在尝试读取它!您需要一个起始值数组,并将一个指针传递给该线程自己的个人副本,该副本不会更改。
[正如其他人所提到的],竞争条件是
start
可以在main
中[更快]改变,而不是在count_letters
中检索。
那是因为我们传递一个指针给
start
。
[至少]有两种方法可以解决这个...
start
按值 [而不是按指针].struct
(例如struct mytask
),它具有所需的所有/许多值并传递一个指针给它。按价值...
在
main
,改变:
pthread_create(&threads[i], NULL, count_letters, &start);
进入:
pthread_create(&threads[i], NULL, count_letters, (void *) (uintptr_t) start);
并且,在
count_letters
中,改变:
uint start = *((int *) arg);
进入:
uint start = (uintptr_t) arg;
因为您已经在为一组
malloc
做pthread_t
,所以让我们将其更改为struct
.
这个比较笼统。在上面的例子中,我们只能传递one值。但是,有了
struct
,我们可以想传多少就传多少:
typedef struct {
pthread_t tid;
uint start;
uint end;
// more values [if we wish] ...
} mytask;
void *
count_letters(void *arg)
{
mytask *tsk = arg;
uint count[N_LETTERS] = { 0 };
for (uint i = tsk->start; i < tsk->end; i++) {
char c = toupper(file_contents[i]);
if (IS_LETTER(c)) {
count[HASH(c)]++;
}
}
pthread_mutex_lock(&mutex);
for (int i = 0; i < N_LETTERS; i++) {
letter_count[i] += count[i];
}
pthread_mutex_unlock(&mutex);
return NULL;
}
int
main(int argc, char **argv)
{
uint chars_counted = 0;
mytask *threads = malloc(args.nt * sizeof(*threads));
if (threads == NULL) {
printf("Error allocating memory. \n");
exit(EXIT_FAILURE);
}
// ...
while (chars_counted < file_size) {
for (int i = 0; i < args.nt; i++) {
mytask *tsk = &threads[i];
tsk->start = chars_counted;
if (tsk->start >= file_size)
break;
tsk->end = (tsk->start + chars_per_thread) >= file_size ?
file_size : (tsk->start + chars_per_thread);
// add more values ...
pthread_create(&tsk->tid, NULL, count_letters, tsk);
chars_counted += chars_per_thread;
}
for (int i = 0; i < args.nt; i++) {
mytask *tsk = &threads[i];
pthread_join(tsk->tid, NULL);
}
}
// ...
return 0;
}