我正在尝试将模糊无监督 c-means 算法与 openmp 并行化,我已经做到了,问题是当我使用 16/32 线程时,它应该比 8/4 线程给出更好的结果,但相反的情况发生了,而且情况变得更糟我使用的线程并不多于核心,我知道这是由于竞争条件造成的,但此时我的大脑停止了工作,哈哈 并行化代码为: ''''
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <omp.h>
#include <time.h>
#define MAX_DATA_POINTS 10000
#define MAX_CLUSTER 100
#define MAX_DATA_DIMENSION 5
int num_data_points;
int num_clusters;
int num_dimensions;
double low_high[MAX_DATA_DIMENSION][2];
double degree_of_memb[MAX_DATA_POINTS][MAX_CLUSTER];
double epsilon;
double fuzziness;
double data_point[MAX_DATA_POINTS][MAX_DATA_DIMENSION];
double cluster_centre[MAX_CLUSTER][MAX_DATA_DIMENSION];
double norms[MAX_DATA_POINTS][MAX_CLUSTER]; // Precomputed norms
// Initialize data and membership matrix
int init(char *fname) {
int i, j, r, rval;
FILE *f;
double s;
if ((f = fopen(fname, "r")) == NULL) {
printf("Failed to open input file.");
return -1;
}
fscanf(f, "%d %d %d", &num_data_points, &num_clusters, &num_dimensions);
if (num_clusters > MAX_CLUSTER || num_data_points > MAX_DATA_POINTS || num_dimensions > MAX_DATA_DIMENSION) {
printf("Input data exceeds defined limits.\n");
fclose(f);
exit(1);
}
fscanf(f, "%lf %lf", &fuzziness, &epsilon);
if (fuzziness <= 1.0 || epsilon <= 0.0 || epsilon > 1.0) {
printf("Invalid fuzziness or epsilon.\n");
fclose(f);
exit(1);
}
// Initialize data points and their range
for (i = 0; i < num_dimensions; i++) {
low_high[i][0] = __DBL_MAX__;
low_high[i][1] = -__DBL_MAX__;
}
for (i = 0; i < num_data_points; i++) {
for (j = 0; j < num_dimensions; j++) {
fscanf(f, "%lf", &data_point[i][j]);
if (data_point[i][j] < low_high[j][0])
low_high[j][0] = data_point[i][j];
if (data_point[i][j] > low_high[j][1])
low_high[j][1] = data_point[i][j];
}
}
// Initialize membership matrix randomly
for (i = 0; i < num_data_points; i++) {
s = 0.0;
r = 100;
for (j = 1; j < num_clusters; j++) {
rval = rand() % (r + 1);
r -= rval;
degree_of_memb[i][j] = rval / 100.0;
s += degree_of_memb[i][j];
}
degree_of_memb[i][0] = 1.0 - s;
}
fclose(f);
return 0;
}
// Precompute norms to avoid redundant calculations
void precompute_norms() {
#pragma omp parallel for collapse(2)
for (int i = 0; i < num_data_points; i++) {
for (int j = 0; j < num_clusters; j++) {
double sum = 0.0;
for (int k = 0; k < num_dimensions; k++) {
double diff = data_point[i][k] - cluster_centre[j][k];
sum += diff * diff;
}
norms[i][j] = sqrt(sum);
}
}
}
// Calculate new cluster centers
int calculate_centre_vectors() {
double t[MAX_DATA_POINTS][MAX_CLUSTER];
#pragma omp parallel for collapse(2)
for (int i = 0; i < num_data_points; i++) {
for (int j = 0; j < num_clusters; j++) {
t[i][j] = pow(degree_of_memb[i][j], fuzziness);
}
}
#pragma omp parallel for collapse(2)
for (int j = 0; j < num_clusters; j++) {
for (int k = 0; k < num_dimensions; k++) {
double numerator = 0.0;
double denominator = 0.0;
for (int i = 0; i < num_data_points; i++) {
numerator += t[i][j] * data_point[i][k];
denominator += t[i][j];
}
cluster_centre[j][k] = numerator / denominator;
}
}
return 0;
}
// Update membership values
double update_degree_of_membership() {
double max_diff = 0.0;
// Precompute norms
precompute_norms();
#pragma omp parallel for reduction(max:max_diff) collapse(2)
for (int i = 0; i < num_data_points; i++) {
for (int j = 0; j < num_clusters; j++) {
double sum = 0.0;
double norm_ij = norms[i][j];
for (int k = 0; k < num_clusters; k++) {
sum += pow(norm_ij / norms[i][k], 2.0 / (fuzziness - 1));
}
double new_uij = 1.0 / sum;
double diff = fabs(new_uij - degree_of_memb[i][j]);
if (diff > max_diff) {
max_diff = diff;
}
degree_of_memb[i][j] = new_uij;
}
}
return max_diff;
}
// FCM clustering process
int fcm(char *fname) {
double max_diff;
if (init(fname) != 0) return -1;
do {
calculate_centre_vectors();
max_diff = update_degree_of_membership();
} while (max_diff > epsilon);
return 0;
}
// Print membership matrix to a file or stdout
void print_membership_matrix(char *fname) {
int i, j;
FILE *f;
if (fname == NULL)
f = stdout;
else if ((f = fopen(fname, "w")) == NULL) {
printf("Cannot create output file.\n");
exit(1);
}
fprintf(f, "Membership matrix Parallel:\n");
for (i = 0; i < num_data_points; i++) {
fprintf(f, "Data[%d]: ", i);
for (j = 0; j < num_clusters; j++) {
fprintf(f, "%lf ", degree_of_memb[i][j]);
}
fprintf(f, "\n");
}
if (fname != NULL)
fclose(f);
}
// Main function
int main(int argc, char **argv) {
if (argc != 2) {
printf("USAGE: fcm <input file>\n");
exit(1);
}
double start_time = omp_get_wtime();
fcm(argv[1]);
double end_time = omp_get_wtime();
double execution_time = end_time - start_time;
printf("Number of data points: %d\n", num_data_points);
printf("Number of clusters: %d\n", num_clusters);
printf("Number of data-point dimensions: %d\n", num_dimensions);
printf("Accuracy margin: %lf\n", epsilon);
print_membership_matrix("membership.matrix");
printf("The program took %f seconds.\n", execution_time);
return 0;
}
''''
尝试优化性能,但当我增加线程数时,性能变得更糟
您能否提供有关您的系统的更多信息,例如CPU型号?根本没有足够的信息来知道这是否应该发生。
如果您想计算问题检查的理论加速阿姆达尔定律