int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource) { unsigned long ret; u64 *rblock; unsigned long block_count; rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!rblock) { ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); ret = -ENOMEM; goto error_data1; } /* rblock must be 4K aligned and should be 4K large */ ret = hipz_h_error_data(shca->ipz_hca_handle, resource, rblock, &block_count); if (ret == H_R_STATE) ehca_err(&shca->ib_device, "No error data is available: %llx.", resource); else if (ret == H_SUCCESS) { int length; length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); if (length > EHCA_PAGESIZE) length = EHCA_PAGESIZE; print_error_data(shca, data, rblock, length); } else ehca_err(&shca->ib_device, "Error data could not be fetched: %llx", resource); ehca_free_fw_ctrlblock(rblock); error_data1: return ret; }
void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle) { unsigned long ret; u64 *rblock; rblock = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!rblock) { ehea_error("Cannot allocate rblock memory."); return; } ret = ehea_h_error_data(adapter->handle, res_handle, rblock); if (ret == H_R_STATE) ehea_error("No error data is available: %lX.", res_handle); else if (ret == H_SUCCESS) print_error_data(rblock); else ehea_error("Error data could not be fetched: %lX", res_handle); kfree(rblock); }
int main (int argc, char *argv[]) { int procid, num_procs; MPI_Status status; // derivative_time, integral_time, err_time is the local sum of runtime for each computation // tick is used to mark time double derivative_time = 0, integral_time = 0, err_time = 0, tick; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Calculate grid-points per process if(NGRID % num_procs > 0) { if(procid == 0) printf("NGRID should be divisible by the number of processes!"); MPI_Finalize(); return 1; } int points_per_node = NGRID / num_procs; //loop index int i; //domain array and step size FP_PREC xc[points_per_node], dx; //function array and derivative //the size will be dependent on the //number of processors used //to the program FP_PREC yc[points_per_node], dyc[points_per_node]; //integration values FP_PREC local_intg, intg; //error analysis array FP_PREC derr[points_per_node]; //error analysis values FP_PREC dlocal_sum_err, davg_err, dlocal_std_dev, dstd_dev, intg_err; //calculate dx dx = (FP_PREC)(XF - XI)/(FP_PREC)(NGRID - 1); // get start X for each process (my_XI) int bins_before_me = procid * points_per_node; FP_PREC my_XI = XI + bins_before_me * dx; //construct grid for (i = 0; i < points_per_node; ++i) { xc[i] = my_XI + i * dx; } //define the function for(i = 0; i < points_per_node; ++i) { yc[i] = fn(xc[i]); } //define holders for left and right bound value FP_PREC left_bound_yc, right_bound_yc; if(procid == 0) left_bound_yc = fn(XI-dx); if(procid == num_procs - 1) right_bound_yc = fn(XF+dx); tick = MPI_Wtime(); #if BLOCKING if(procid == 0) printf("Using blocking message! \n"); //Step 1: even nodes send to the right then receive back //Step 2: even nodes receive from the left then send back if(procid % 2 == 0) { if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } if(procid > 0) { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); } } else { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } } #else if(procid == 0) printf("Using non-blocking message! \n"); MPI_Request request[4]; int current_request = 0; if(procid < num_procs - 1) { // receive right bound yc MPI_Irecv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // receive left bound yc MPI_Irecv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid < num_procs - 1) { // send right bound yc to right node MPI_Isend(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // send left bound yc to left node MPI_Isend(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } #endif derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; // Overlap computation and communication BEGIN //compute the derivative using first-order finite differencing tick = MPI_Wtime(); for (i = 1; i < points_per_node-1; ++i) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } derivative_time += MPI_Wtime() - tick; //compute the integral using Trapazoidal rule tick = MPI_Wtime(); local_intg = 0.0; for (i = 0; i < points_per_node-1; ++i) { local_intg += 0.5 * (yc[i] + yc[i + 1]) * dx; } integral_time += MPI_Wtime() - tick; // Overlap computation and communication END // WAIT for non-blocking message complete before continue #if !BLOCKING tick = MPI_Wtime(); MPI_Waitall(current_request, request, MPI_STATUSES_IGNORE); derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; #endif // compute derivative of boundary points, runtime is not counted because it's quite small dyc[0] = (yc[1] - left_bound_yc)/(2.0 * dx); dyc[points_per_node-1] = (right_bound_yc - yc[points_per_node-2])/(2.0 * dx); // compute integral at right boundary point, runtime is not counted because it's quite small if(procid < num_procs-1) local_intg += 0.5 * (yc[points_per_node-1] + right_bound_yc) * dx; tick = MPI_Wtime(); //compute the error, average error of the derivatives for(i = 0; i < points_per_node; ++i) { if(dfn(xc[i]) == 0) { printf("WARNING: derivative at point %d on process %d is zero.\n", i, procid); derr[i] = 0; } else derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } //find the local average error dlocal_sum_err = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_sum_err += derr[i]; } //calculate and output errors #if SINGLE_CALL_REDUCTION if(procid == 0) printf("Using single call reduction! \n"); //all nodes collect sum err and convert it to the mean value MPI_Allreduce(&dlocal_sum_err, &davg_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); davg_err /= (FP_PREC)NGRID; // each process calculates global average #else if(procid == 0) printf("Using manual call reduction! \n"); //all nodes collect sum err and convert it to the mean value if(procid != 0) MPI_Send(&dlocal_sum_err, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); else if(procid == 0) { davg_err = dlocal_sum_err; for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_sum_err, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); davg_err += dlocal_sum_err; } davg_err /= (FP_PREC)NGRID; } MPI_Bcast(&davg_err, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif //now all nodes have davg_err, find sum squared differences of local derr dlocal_std_dev = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_std_dev += pow(derr[i] - davg_err, 2); } err_time += MPI_Wtime() - tick; #if SINGLE_CALL_REDUCTION //reduce local integral & local (sum squared differences of derr) to root tick = MPI_Wtime(); MPI_Reduce(&dlocal_std_dev, &dstd_dev, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Reduce(&local_intg, &intg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; #else //reduce local integral & local (sum squared differences of derr) to root if(procid != 0) { tick = MPI_Wtime(); MPI_Send(&dlocal_std_dev, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Send(&local_intg, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; } else if(procid == 0) { dstd_dev = dlocal_std_dev; intg = local_intg; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_std_dev, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); dstd_dev += dlocal_std_dev; } err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&local_intg, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status); intg+= local_intg; } integral_time += MPI_Wtime() - tick; } #endif // print out the max runtime for each calculation double max_derivative_time, max_integral_time, max_err_time; MPI_Reduce(&derivative_time, &max_derivative_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&integral_time, &max_integral_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&err_time, &max_err_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(procid == 0) { printf("Max runtime to calculate derivatives is %e\n", max_derivative_time); printf("Max runtime to calculate integral is %e\n", max_integral_time); printf("Max runtime to calculate derivative errors is %e\n", max_err_time); } //gather derivative results & errors for output //this part shouldn't be included in running time measurements FP_PREC *final_dyc = NULL; FP_PREC *final_derr = NULL; if(procid == 0) { final_dyc = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); final_derr = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); } MPI_Gather(dyc, points_per_node, MPI_DOUBLE, final_dyc, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(derr, points_per_node, MPI_DOUBLE, final_derr, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); //final output at root node (rank 0) if(procid == 0) { dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); if(ifn(XI, XF) == 0) { printf("WARNING: true integral value from XI to XF is equal zero.\n"); intg_err = 0; } else { intg_err = fabs((ifn(XI, XF) - intg)/ifn(XI, XF)); } print_function_data(NGRID, dx, final_dyc); print_error_data(NGRID, davg_err, dstd_dev, intg_err, dx, final_derr); free(final_dyc); free(final_derr); } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int taskId, totaltasks, i, j; int chunk; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &taskId); MPI_Comm_size(MPI_COMM_WORLD, &totaltasks); chunk = NGRID / totaltasks; FP_PREC xc[chunk + 2]; FP_PREC yc[chunk + 2]; FP_PREC dyc[chunk + 2]; FP_PREC derr[chunk + 2]; FP_PREC intg; FP_PREC dx; int prev_task = (taskId - 1) < 0 ? totaltasks - 1 : taskId - 1; int next_task = (taskId + 1) % totaltasks; MPI_Request reqs[4]; MPI_Status stats[4]; MPI_Irecv(&yc[0], 1, MPI_DOUBLE, prev_task, prev_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[0]); MPI_Irecv(&yc[chunk + 1], 1, MPI_DOUBLE, next_task, next_task * 1000 + taskId, MPI_COMM_WORLD, &reqs[1]); for (i = 1; i <= chunk + 1; i++) { xc[i] = (XI + (XF - XI) * (FP_PREC) (i - 1) / (FP_PREC) (NGRID - 1)) + taskId * chunk; } //define the function for (i = 1; i <= chunk; i++) { yc[i] = fn(xc[i]); } MPI_Isend(&yc[chunk], 1, MPI_DOUBLE, next_task, taskId * 1000 + next_task, MPI_COMM_WORLD, &reqs[3]); MPI_Isend(&yc[1], 1, MPI_DOUBLE, prev_task, taskId * 1000 + prev_task, MPI_COMM_WORLD, &reqs[2]); MPI_Waitall(4, reqs, stats); dx = xc[2] - xc[1]; if (taskId == ROOT) { xc[0] = xc[1] - dx; yc[0] = fn(xc[0]); } if (taskId == totaltasks - 1) { xc[chunk + 1] = xc[chunk] + dx; yc[chunk + 1] = fn(xc[chunk + 1]); } //compute the derivative using first-order finite differencing for (i = 1; i <= chunk; i++) { dyc[i] = (yc[i + 1] - yc[i - 1]) / (2.0 * dx); } //compute the integral using Trapazoidal rule intg = 0.0; for (i = 1; i <= chunk; i++) { if (taskId == totaltasks - 1 && i == chunk) continue; intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]); } //compute the errors for (i = 1; i <= chunk; i++) { if (i - 1 != chunk - 1) derr[i] = fabs((dyc[i] - dfn(xc[i])) / dfn(xc[i])); } if (taskId != ROOT) { MPI_Request nreqs[2]; MPI_Status nstats[2]; MPI_Isend(derr + 1, chunk, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[0]); MPI_Isend(&intg, 1, MPI_DOUBLE, ROOT, taskId * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[1]); MPI_Waitall(2, nreqs, nstats); } else { FP_PREC allxc[NGRID]; FP_PREC allderr[NGRID]; FP_PREC allintg[totaltasks]; FP_PREC davg_err = 0.0; FP_PREC dstd_dev = 0.0; FP_PREC intg_err = 0.0; MPI_Request nreqs[2 * (totaltasks - 1)]; MPI_Status nstats[2 * (totaltasks - 1)]; for (i = 1; i < totaltasks; i++) { MPI_Irecv(allderr + (i * chunk), chunk, MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[2 * (i - 1)]); MPI_Irecv(allintg + i, 1, MPI_DOUBLE, i, i * 1000 + ROOT, MPI_COMM_WORLD, &nreqs[2 * (i - 1) + 1]); } for (i = 0; i < chunk; i++) { allderr[i] = derr[i + 1]; } MPI_Waitall(2 * (totaltasks - 1), nreqs, nstats); //find the average error for (i = 0; i < NGRID; i++) davg_err += allderr[i]; for (i = 1; i < totaltasks; i++) { intg += allintg[i]; } davg_err /= (FP_PREC) NGRID; dstd_dev = 0.0; for (i = 0; i < NGRID; i++) { dstd_dev += pow(allderr[i] - davg_err, 2); } dstd_dev = sqrt(dstd_dev / (FP_PREC) NGRID); intg_err = fabs((ifn(XI, XF) - intg) / ifn(XI, XF)); for (i = 0; i < NGRID; i++) { allxc[i] = XI + (XF - XI) * (FP_PREC) i / (FP_PREC) (NGRID - 1); } //print_error_data(NGRID, davg_err, dstd_dev, &xc[1], derr, intg_err); print_error_data(NGRID, davg_err, dstd_dev, allxc, allderr, intg_err); } MPI_Finalize(); }