这是我用于散热器上热扩散的原始顺序代码,然后我需要在使用 MPI 并通过在 2D 中与每个处理器共享域来将其并行化:
#define DUMP_STEADY_STATE
const double L = 0.15; /* length (x) of the heatsink (m) */
const double l = 0.12; /* height (y) of the heatsink (m) */
const double E = 0.008; /* width (z) of the heatsink (m) */
const double watercooling_T = 20; /* temperature of the fluid for water-cooling, (°C) */
const double CPU_TDP = 280; /* power dissipated by the CPU (W) */
/* dl: "spatial step" for simulation (m) */
/* dt: "time step" for simulation (s) */
double dl = 0.004;
double dt = 0.004;
/* sink_heat_capacity: specific heat capacity of the heatsink (J / kg / K) */
/* sink_density: density of the heatsink (kg / m^3) */
/* sink_conductivity: thermal conductivity of the heatsink (W / m / K) */
/* euros_per_kg: price of the matter by kilogram */
double sink_heat_capacity = 897;
double sink_density = 2710;
double sink_conductivity = 237;
double euros_per_kg = 1.594;
const double Stefan_Boltzmann = 5.6703e-8; /* (W / m^2 / K^4), radiation of black body */
const double heat_transfer_coefficient = 10; /* coefficient of thermal convection (W / m^2 / K) */
double CPU_surface;
/*
* Return True if the CPU is in contact with the heatsink at the point (x,y).
* This describes an AMD EPYC "Rome".
*/
static inline bool CPU_shape(double x, double y)
{
x -= (L - 0.0754) / 2;
y -= (l - 0.0585) / 2;
bool small_y_ok = (y > 0.015 && y < 0.025) || (y > 0.0337 && y < 0.0437);
bool small_x_ok = (x > 0.0113 && x < 0.0186) || (x > 0.0193 && x < 0.0266)
|| (x > 0.0485 && x < 0.0558) || (x > 0.0566 && x < 0.0639);
bool big_ok = (x > 0.03 && x < 0.045 && y > 0.0155 && y < 0.0435);
return big_ok || (small_x_ok && small_y_ok);
}
/* returns the total area of the surface of contact between CPU and heatsink (in m^2) */
double CPU_contact_surface()
{
double S = 0;
for (double x = dl / 2; x < L; x += dl)
for (double y = dl / 2; y < l; y += dl)
if (CPU_shape(x, y))
S += dl * dl;
return S;
}
/* Returns the new temperature of the cell (i, j, k). For this, there is an access to neighbor
* cells (left, right, top, bottom, front, back), except if (i, j, k) is on the external surface. */
static inline double update_temperature(const double *T, int u, int n, int m, int o, int i, int j, int k)
{
/* quantity of thermal energy that must be brought to a cell to make it heat up by 1°C */
const double cell_heat_capacity = sink_heat_capacity * sink_density * dl * dl * dl; /* J.K */
const double dl2 = dl * dl;
double thermal_flux = 0;
if (i > 0)
thermal_flux += (T[u - 1] - T[u]) * sink_conductivity * dl; /* neighbor x-1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (i < n - 1)
thermal_flux += (T[u + 1] - T[u]) * sink_conductivity * dl; /* neighbor x+1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (j > 0)
thermal_flux += (T[u - n] - T[u]) * sink_conductivity * dl; /* neighbor y-1 */
else {
/* Bottom cell: does it receive it from the CPU ? */
if (CPU_shape(i * dl, k * dl))
thermal_flux += CPU_TDP / CPU_surface * dl2;
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
}
if (j < m - 1)
thermal_flux += (T[u + n] - T[u]) * sink_conductivity * dl; /* neighbor y+1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (k > 0)
thermal_flux += (T[u - n * m] - T[u]) * sink_conductivity * dl; /* neighbor z-1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (k < o - 1)
thermal_flux += (T[u + n * m] - T[u]) * sink_conductivity * dl; /* neighbor z+1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
/* adjust temperature depending on the heat flux */
return T[u] + thermal_flux * dt / cell_heat_capacity;
}
/* Run the simulation on the k-th xy plane.
* v is the index of the start of the k-th xy plane in the arrays T and R. */
static inline void do_xy_plane(const double *T, double *R, int v, int n, int m, int o, int k)
{
if (k == 0)
// we do not modify the z = 0 plane: it is maintained at constant temperature via water-cooling
return;
for (int j = 0; j < m; j++) { // y
for (int i = 0; i < n; i++) { // x
int u = v + j * n + i;
R[u] = update_temperature(T, u, n, m, o, i, j, k);
}
}
}
int main()
{
double start = wallclock_time();
CPU_surface = CPU_contact_surface();
double V = L * l * E;
int n = ceil(L / dl);
int m = ceil(E / dl);
int o = ceil(l / dl);
/* temperature of each cell, in degree Kelvin. */
double *T = malloc(n * m * o * sizeof(*T));
double *R = malloc(n * m * o * sizeof(*R));
if (T == NULL || R == NULL) {
perror("T or R could not be allocated");
exit(1);
}
/* initially the heatsink is at the temperature of the water-cooling fluid */
for (int u = 0; u < n * m * o; u++)
R[u] = T[u] = watercooling_T + 273.15;
/* let's go! we switch the CPU on and launch the simulation until it reaches a stationary state. */
double t = 0;
int n_steps = 0;
int convergence = 0;
/* simulating time steps */
while (convergence == 0) {
/* Update all cells. xy planes are processed, for increasing values of z. */
for (int k = 0; k < o; k++) { // z
int v = k * n * m;
do_xy_plane(T, R, v, n, m, o, k);
}
/* each second, we test the convergence, and print a short progress report */
if (n_steps % ((int)(1 / dt)) == 0) {
double delta_T = 0;
double max = -INFINITY;
for (int u = 0; u < n * m * o; u++) {
delta_T += (R[u] - T[u]) * (R[u] - T[u]);
if (R[u] > max)
max = R[u];
}
delta_T = sqrt(delta_T) / dt;
fprintf(stderr, "t = %.1fs ; T_max = %.1f°C ; convergence = %g\n", t, max - 273.15, delta_T);
if (delta_T < 0.1)
convergence = 1;
}
/* the new temperatures are in R */
double *tmp = R;
R = T;
T = tmp;
t += dt;
n_steps += 1;
}
#ifdef DUMP_STEADY_STATE
printf("###### STEADY STATE; t = %.1f\n", t);
for (int k = 0; k < o; k++) { // z
printf("# z = %g\n", k * dl);
for (int j = 0; j < m; j++) { // y
for (int i = 0; i < n; i++) { // x
printf("%.1f ", T[k * n * m + j * n + i] - 273.15);
}
printf("\n");
}
}
double end = wallclock_time();
fprintf(stderr, "Total computing time: %g sec\n", end - start);
printf("\n");
fprintf(stderr, "For graphical rendering: python3 rendu_picture_steady.py [filename.txt] %d %d %d\n", n, m, o);
#endif
exit(EXIT_SUCCESS);
}
现在我正在尝试使用 OpenMPI 进行 2d 热扩散:
我开始编写代码,但是当我在接收数据时遇到问题,然后使用它们计算 MPI_SendRecv 中的新温度时,如果我在左上角,我将需要来自我的处理器的数据就在我面前,问题是如何在使用两个日期的同时计算温度,也许我没有以正确的方式这样做:
int main(){ MPI_Init(NULL, NULL); // Initialize the MPI environment
// same initialisation
double start = wallclock_time();
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int z_domain = o / size; // for each processor
int rest_line_z = o % size;
int start_z =0;
int last_z=0;
int x_domain = n / size; // for each processor
int rest_line_x = n % size;
int start_x =0;
int last_x=0;
// number of processor on each axis (x et z).
int num_processors = size;
int x_processors = 1; // Initialize x_processors
int z_processors;
while (x_processors < num_processors) {
if (num_processors % x_processors == 0) {
z_processors = num_processors / x_processors;
if (x_processors >= 2 && z_processors >= 2) {
break;
}
}
x_processors++;
}
// Distribution of the local_domain to each processor
int x_count = 0;
int z_count = 0;
for (int i = 0; i < size; i++) {
if (x_count == x_processors) {
x_count = 0;
z_count++;
}
if (i == rank) {
start_x = x_count * x_domain;
start_z = z_count * z_domain;
}
x_count++;
}
if (rank == size - 1) { // the rest lines on z will be for the last processor
end_z = start_z + z_domain + rest_line_z;
}
else {
end_z = end_z + z_domain;
}
if (rank == x_processors - 1){ // rest line on x will be treated by the last processor on each z_domain
end_x = start_x + x_domain + rest_line_x;
else {
end_x = end_x + x_tranche;
}
double t = 0;
int n_steps = 0;
int convergence = 0;
while (convergence == 0) {
// for each processor , we parcours the x axis and the z axis
for (int k = start_z; k <= end_z; k++) {
for (int i = start_x; i <= end_x; i++) {
int v = k * n * m + i * m; // v = indicate the firt cell
double *halo = malloc( n * m * sizeof(*bord));
if (rank > x_processors && k == start_z){
MPI_Sendrecv(&T[v],n * m, MPI_DOUBLE, rank - x_processors, 0, halo ,n * m, MPI_DOUBLE, rank, 1, MPI_COMM_WORLD, &status);
}
if (rank + x_processors < size - 1 && k == end_z){
MPI_Sendrecv(&T[v],n * m, MPI_DOUBLE, rank + x_processors, 1,halo,n * m, MPI_DOUBLE, rank, 0, MPI_COMM_WORLD, &status);
}
if (rank >0 && i == start_x){
double *haloright = malloc(m * z_domain * sizeof(*haloright));
if ((rank > x_processors && k == start_z ) || (rank + x_processors < size - 1 && k == end_z)){
MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank - 1, 2,haloright,z_domain * m, MPI_DOUBLE, rank, 3, MPI_COMM_WORLD, &status);
}
else {
MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank - 1, 2,haloright,z_tranche * m, MPI_DOUBLE, rank, 3, MPI_COMM_WORLD, &status);
}
for (int j = 0; j < m; j++) { // y
int u = v + j * n + k;
R[u] = update_temperature(haloright, u, x_domain, m, z_domain, i, j, k);
}
}
if (rank < size - 1 && (rank + 1) % x_processors != 0 && i == end_x){
double *haloleft= malloc( m * z_domain * sizeof(*haloright));
if ((rank > x_processors && k == start_z ) || (rank + x_processors < size - 1 && k == end_z)){
MPI_Sendrecv(&halo[v],z_domain * m, MPI_DOUBLE, rank+1, 3,haloright,z_domain * m, MPI_DOUBLE, rank, 2, MPI_COMM_WORLD, &status);
}
else {
MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank + 1, 3, haloright,z_domain * m, MPI_DOUBLE, rank, 2, MPI_COMM_WORLD, &status);
}
for (int j = 0; j < m; j++) { // y
int u = v + j * n + k;
R[u] = update_temperature(haloright, u, x_domain, m, z_domain, i, j, k);
}
}
for (int j = 0; j < m; j++) { // y
int u = v + j * n + k;
R[u] = update_temperature(bord, u, x_domain, m, z_domain, i, j, k);
}
}
}
double *tmp = R;
R = T;
T = tmp;
t += dt;
n_steps += 1;
}
#ifdef DUMP_STEADY_STATE
// same code
// Finalize the MPI environment
MPI_Finalize();
return 0;
}
有人可以帮助我吗
你找到解决办法了吗?