/** General ARMCI global operation (reduction). Collective on group. * * @param[in] scope Scope in which to perform the GOP (only SCOPE_ALL is supported) * @param[inout] x Vector of n doubles, contains input and will contain output. * @param[in] n Length of x * @param[in] op One of '+', '*', 'max', 'min', 'absmax', 'absmin' * @param[in] type Data type of x * @param[in] group Group on which to perform the GOP */ void armci_msg_group_gop_scope(int scope, void *x, int n, char *op, int type, ARMCI_Group *group) { void *out; MPI_Op mpi_op; MPI_Datatype mpi_type; MPI_Comm comm; int mpi_type_size; if (scope == SCOPE_ALL || scope == SCOPE_MASTERS) comm = group->comm; else comm = MPI_COMM_SELF; if (op[0] == '+') { mpi_op = MPI_SUM; } else if (op[0] == '*') { mpi_op = MPI_PROD; } else if (strncmp(op, "max", 3) == 0) { mpi_op = MPI_MAX; } else if (strncmp(op, "min", 3) == 0) { mpi_op = MPI_MIN; } else if (strncmp(op, "or", 2) == 0) { mpi_op = MPI_BOR; } else if (strncmp(op, "absmax", 6) == 0) { mpi_op = MPI_ABSMAX_OP; } else if (strncmp(op, "absmin", 6) == 0) { mpi_op = MPI_ABSMIN_OP; } else { ARMCII_Error("unknown operation \'%s\'", op); return; } switch(type) { case ARMCI_INT: mpi_type = MPI_INT; break; case ARMCI_LONG: mpi_type = MPI_LONG; break; case ARMCI_LONG_LONG: mpi_type = MPI_LONG_LONG; break; case ARMCI_FLOAT: mpi_type = MPI_FLOAT; break; case ARMCI_DOUBLE: mpi_type = MPI_DOUBLE; break; default: ARMCII_Error("unknown type (%d)", type); return; } // ABS MAX/MIN are unary as well as binary. We need to also apply abs in the // single processor case when reduce would normally just be a no-op. if (group->size == 1 && (mpi_op == MPI_ABSMAX_OP || mpi_op == MPI_ABSMIN_OP)) { ARMCII_Absv_op(x, x, &n, &mpi_type); return; } MPI_Type_size(mpi_type, &mpi_type_size); out = malloc(n*mpi_type_size); ARMCII_Assert(out != NULL); MPI_Allreduce(x, out, n, mpi_type, mpi_op, group->comm); ARMCI_Copy(out, x, n*mpi_type_size); free(out); }
/** General ARMCI global operation (reduction). Collective on group. * * @param[in] scope Scope in which to perform the GOP (only SCOPE_ALL is supported) * @param[inout] x Vector of n data elements, contains input and will contain output. * @param[in] n Length of x * @param[in] op One of '+', '*', 'max', 'min', 'absmax', 'absmin' * @param[in] type Data type of x (e.g. ARMCI_INT, ...) * @param[in] group Group on which to perform the GOP */ void armci_msg_group_gop_scope(int scope, void *x, int n, char *op, int type, ARMCI_Group *group) { void *out, **x_buf; MPI_Op mpi_op; MPI_Datatype mpi_type; MPI_Comm comm; int mpi_type_size; /* FIXME: scope argument presently ignored */ if (scope == SCOPE_ALL || scope == SCOPE_MASTERS) comm = group->comm; else comm = MPI_COMM_SELF; if (op[0] == '+') { mpi_op = MPI_SUM; } else if (op[0] == '*') { mpi_op = MPI_PROD; } else if (strncmp(op, "max", 3) == 0) { mpi_op = MPI_MAX; } else if (strncmp(op, "min", 3) == 0) { mpi_op = MPI_MIN; } else if (strncmp(op, "or", 2) == 0) { mpi_op = MPI_BOR; } else if (strncmp(op, "absmax", 6) == 0) { mpi_op = ARMCI_MPI_ABSMAX_OP; } else if (strncmp(op, "absmin", 6) == 0) { mpi_op = ARMCI_MPI_ABSMIN_OP; /* The following were added ComEx/ARMCI in 2017. */ /* https://github.com/GlobalArrays/ga/commit/14ef3cfa4ea3ffa7ee721c2a98685669359f7044 */ /* && and || need to be tested before & and | to avoid the latter matching the former. */ } else if ((strncmp(op, "land", 4) == 0) || (strncmp(op, "&&", 2) == 0)) { mpi_op = MPI_LAND; } else if ((strncmp(op, "lor", 3) == 0) || (strncmp(op, "||", 2) == 0)) { mpi_op = MPI_LOR; } else if ((strncmp(op, "band", 4) == 0) || (strncmp(op, "&", 1) == 0)) { mpi_op = MPI_BAND; } else if ((strncmp(op, "bor", 3) == 0) || (strncmp(op, "|", 1) == 0)) { mpi_op = MPI_BOR; } else { ARMCII_Error("unknown operation \'%s\'", op); return; } switch(type) { case ARMCI_INT: mpi_type = MPI_INT; break; case ARMCI_LONG: mpi_type = MPI_LONG; break; case ARMCI_LONG_LONG: mpi_type = MPI_LONG_LONG; break; case ARMCI_FLOAT: mpi_type = MPI_FLOAT; break; case ARMCI_DOUBLE: mpi_type = MPI_DOUBLE; break; default: ARMCII_Error("unknown type (%d)", type); return; } MPI_Type_size(mpi_type, &mpi_type_size); ARMCII_Buf_prepare_read_vec(&x, &x_buf, 1, n*mpi_type_size); // ABS MAX/MIN are unary as well as binary. We need to also apply abs in the // single processor case when reduce would normally just be a no-op. if (group->size == 1 && (mpi_op == ARMCI_MPI_ABSMAX_OP || mpi_op == ARMCI_MPI_ABSMIN_OP)) { ARMCII_Absv_op(x_buf[0], x_buf[0], &n, &mpi_type); } else { out = malloc(n*mpi_type_size); ARMCII_Assert(out != NULL); MPI_Allreduce(x_buf[0], out, n, mpi_type, mpi_op, comm); ARMCI_Copy(out, x_buf[0], n*mpi_type_size); free(out); } ARMCII_Buf_finish_write_vec(&x, x_buf, 1, n*mpi_type_size); }